diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index ee21a1eeb2..6f180b1f9e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -35,7 +35,6 @@ import org.apache.calcite.rel.core.Exchange; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Join; -import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableFunctionScan; @@ -402,24 +401,8 @@ private QueryBlockInfo convertSource(RelNode r) throws CalciteSemanticException QueryBlockInfo right = convertSource(join.getRight()); s = new Schema(left.schema, right.schema); ASTNode cond = join.getCondition().accept(new RexVisitor(s, false, r.getCluster().getRexBuilder())); - boolean semiJoin = join.isSemiJoin(); - if (join.getRight() instanceof Join && !semiJoin) { - // should not be done for semijoin since it will change the semantics - // Invert join inputs; this is done because otherwise the SemanticAnalyzer - // methods to merge joins will not kick in - JoinRelType type; - if (join.getJoinType() == JoinRelType.LEFT) { - type = JoinRelType.RIGHT; - } else if (join.getJoinType() == JoinRelType.RIGHT) { - type = JoinRelType.LEFT; - } else { - type = join.getJoinType(); - } - ast = ASTBuilder.join(right.ast, left.ast, type, cond); - } else { - ast = ASTBuilder.join(left.ast, right.ast, join.getJoinType(), cond); - } - if (semiJoin) { + ast = ASTBuilder.join(left.ast, right.ast, join.getJoinType(), cond); + if (join.isSemiJoin()) { s = left.schema; } } else if (r instanceof Union) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java index fe4ecf0aba..97b8c78f4e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java @@ -50,7 +50,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.HiveJdbcConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelColumnsAlignment; @@ -282,17 +281,7 @@ private static boolean validJoinParent(RelNode joinNode, RelNode parent) { boolean validParent = true; if (parent instanceof Join) { - // In Hive AST, right child of join cannot be another join, - // thus we need to introduce a project on top of it. - // But we only need the additional project if the left child - // is another join too; if it is not, ASTConverter will swap - // the join inputs, leaving the join operator on the left. - // we also do it if parent is HiveSemiJoin since ASTConverter won't - // swap inputs then - // This will help triggering multijoin recognition methods that - // are embedded in SemanticAnalyzer. - if (((Join) parent).getRight() == joinNode && - (((Join) parent).getLeft() instanceof Join || parent instanceof HiveSemiJoin) ) { + if (((Join) parent).getRight() == joinNode) { validParent = false; } } else if (parent instanceof SetOp) { diff --git a/ql/src/test/results/clientpositive/auto_join12.q.out b/ql/src/test/results/clientpositive/auto_join12.q.out index 60a8f4732f..d1201b4824 100644 --- a/ql/src/test/results/clientpositive/auto_join12.q.out +++ b/ql/src/test/results/clientpositive/auto_join12.q.out @@ -25,18 +25,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 + Stage-7 is a root stage + Stage-2 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_0:src Fetch Operator limit: -1 - $hdt$_0:$hdt$_1:src + $hdt$_0:$hdt$_1:$hdt$_1:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -56,7 +56,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_0:$hdt$_1:src + $hdt$_0:$hdt$_1:$hdt$_1:src TableScan alias: src filterExpr: (UDFToDouble(key) < 80.0D) (type: boolean) @@ -73,7 +73,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -101,10 +101,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3 + outputColumnNames: _col1, _col2 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col0,_col3) (type: int) + expressions: hash(_col2,_col1) (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator diff --git a/ql/src/test/results/clientpositive/auto_join13.q.out b/ql/src/test/results/clientpositive/auto_join13.q.out index 5f83b393fe..867ec7416b 100644 --- a/ql/src/test/results/clientpositive/auto_join13.q.out +++ b/ql/src/test/results/clientpositive/auto_join13.q.out @@ -25,18 +25,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 + Stage-7 is a root stage + Stage-2 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_0:src Fetch Operator limit: -1 - $hdt$_0:$hdt$_1:src + $hdt$_0:$hdt$_1:$hdt$_1:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -54,9 +54,9 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: - 0 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) - 1 _col0 (type: double) - $hdt$_0:$hdt$_1:src + 0 _col0 (type: double) + 1 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) + $hdt$_0:$hdt$_1:$hdt$_1:src TableScan alias: src filterExpr: (UDFToDouble(key) < 100.0D) (type: boolean) @@ -73,7 +73,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -99,12 +99,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) - 1 _col0 (type: double) - outputColumnNames: _col1, _col2 + 0 _col0 (type: double) + 1 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) + outputColumnNames: _col2, _col3 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col2,_col1) (type: int) + expressions: hash(_col3,_col2) (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator diff --git a/ql/src/test/results/clientpositive/auto_join20.q.out b/ql/src/test/results/clientpositive/auto_join20.q.out index 6475ad2ee0..b823d0e61f 100644 --- a/ql/src/test/results/clientpositive/auto_join20.q.out +++ b/ql/src/test/results/clientpositive/auto_join20.q.out @@ -19,24 +19,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-8 depends on stages: Stage-10 - Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 - Stage-9 has a backup stage: Stage-2 - Stage-6 depends on stages: Stage-9 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-6 depends on stages: Stage-7 , consists of Stage-8, Stage-1 + Stage-8 has a backup stage: Stage-1 + Stage-5 depends on stages: Stage-8 + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:src1 + $hdt$_0:$hdt$_1:$hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:src1 + $hdt$_0:$hdt$_1:$hdt$_1:src1 TableScan alias: src1 filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) @@ -53,7 +53,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-8 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -85,10 +85,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-7 + Stage: Stage-6 Conditional Operator - Stage: Stage-9 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$INTNAME @@ -99,13 +99,13 @@ STAGE PLANS: TableScan HashTable Sink Operator filter predicates: - 0 - 1 {_col2} + 0 {_col2} + 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -117,17 +117,17 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {_col2} + 0 {_col2} + 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) + expressions: hash(_col3,_col4,_col5,_col6,_col0,_col1) (type: int) outputColumnNames: _col0 Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -146,7 +146,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -170,17 +170,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -195,20 +187,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: boolean) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {VALUE._col1} + 0 {VALUE._col1} + 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) + expressions: hash(_col3,_col4,_col5,_col6,_col0,_col1) (type: int) outputColumnNames: _col0 Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -270,24 +270,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-8 depends on stages: Stage-10 - Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 - Stage-9 has a backup stage: Stage-2 - Stage-6 depends on stages: Stage-9 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-6 depends on stages: Stage-7 , consists of Stage-8, Stage-1 + Stage-8 has a backup stage: Stage-1 + Stage-5 depends on stages: Stage-8 + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:src1 + $hdt$_0:$hdt$_1:$hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:src1 + $hdt$_0:$hdt$_1:$hdt$_1:src1 TableScan alias: src1 filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) @@ -304,7 +304,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-8 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -336,10 +336,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-7 + Stage: Stage-6 Conditional Operator - Stage: Stage-9 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$INTNAME @@ -350,13 +350,13 @@ STAGE PLANS: TableScan HashTable Sink Operator filter predicates: - 0 - 1 {_col2} + 0 {_col2} + 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -368,17 +368,17 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {_col2} + 0 {_col2} + 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) + expressions: hash(_col3,_col4,_col5,_col6,_col0,_col1) (type: int) outputColumnNames: _col0 Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -397,7 +397,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -421,17 +421,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -446,20 +438,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: boolean) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {VALUE._col1} + 0 {VALUE._col1} + 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) + expressions: hash(_col3,_col4,_col5,_col6,_col0,_col1) (type: int) outputColumnNames: _col0 Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator diff --git a/ql/src/test/results/clientpositive/auto_join21.q.out b/ql/src/test/results/clientpositive/auto_join21.q.out index 35a0f32fe2..2854b58f31 100644 --- a/ql/src/test/results/clientpositive/auto_join21.q.out +++ b/ql/src/test/results/clientpositive/auto_join21.q.out @@ -9,24 +9,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-8 depends on stages: Stage-10 - Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 - Stage-9 has a backup stage: Stage-2 - Stage-6 depends on stages: Stage-9 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-6 depends on stages: Stage-7 , consists of Stage-8, Stage-1 + Stage-8 has a backup stage: Stage-1 + Stage-5 depends on stages: Stage-8 + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_1:src1 + $hdt$_1:$hdt$_1:$hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_1:src1 + $hdt$_1:$hdt$_1:$hdt$_1:src1 TableScan alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -42,7 +42,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-8 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -73,10 +73,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-7 + Stage: Stage-6 Conditional Operator - Stage: Stage-9 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -87,13 +87,13 @@ STAGE PLANS: TableScan HashTable Sink Operator filter predicates: - 0 - 1 {_col2} + 0 {_col2} + 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col2 (type: string) - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -105,26 +105,30 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {_col2} + 0 {_col2} + 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -147,17 +151,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -172,24 +168,36 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: boolean) + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {VALUE._col1} + 0 {VALUE._col1} + 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/auto_join28.q.out b/ql/src/test/results/clientpositive/auto_join28.q.out index e64539efd4..d3bf63efd3 100644 --- a/ql/src/test/results/clientpositive/auto_join28.q.out +++ b/ql/src/test/results/clientpositive/auto_join28.q.out @@ -9,24 +9,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-8 depends on stages: Stage-10 - Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 - Stage-9 has a backup stage: Stage-2 - Stage-6 depends on stages: Stage-9 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-6 depends on stages: Stage-7 , consists of Stage-8, Stage-1 + Stage-8 has a backup stage: Stage-1 + Stage-5 depends on stages: Stage-8 + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_1:src1 + $hdt$_1:$hdt$_1:$hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_1:src1 + $hdt$_1:$hdt$_1:$hdt$_1:src1 TableScan alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -42,7 +42,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-8 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -73,10 +73,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-7 + Stage: Stage-6 Conditional Operator - Stage: Stage-9 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -87,13 +87,13 @@ STAGE PLANS: TableScan HashTable Sink Operator filter predicates: - 0 - 1 {_col2} + 0 {_col2} + 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col2 (type: string) - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -105,26 +105,30 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {_col2} + 0 {_col2} + 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -147,17 +151,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -172,24 +168,36 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: boolean) + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {VALUE._col1} + 0 {VALUE._col1} + 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/auto_join29.q.out b/ql/src/test/results/clientpositive/auto_join29.q.out index 335b6dc52b..77f7af412a 100644 --- a/ql/src/test/results/clientpositive/auto_join29.q.out +++ b/ql/src/test/results/clientpositive/auto_join29.q.out @@ -9,24 +9,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-8 depends on stages: Stage-10 - Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 - Stage-9 has a backup stage: Stage-2 - Stage-6 depends on stages: Stage-9 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-6 depends on stages: Stage-7 , consists of Stage-8, Stage-1 + Stage-8 has a backup stage: Stage-1 + Stage-5 depends on stages: Stage-8 + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_1:src1 + $hdt$_1:$hdt$_1:$hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_1:src1 + $hdt$_1:$hdt$_1:$hdt$_1:src1 TableScan alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -42,7 +42,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-8 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -73,10 +73,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-7 + Stage: Stage-6 Conditional Operator - Stage: Stage-9 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -87,13 +87,13 @@ STAGE PLANS: TableScan HashTable Sink Operator filter predicates: - 0 - 1 {_col2} + 0 {_col2} + 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col2 (type: string) - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -105,26 +105,30 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {_col2} + 0 {_col2} + 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -147,17 +151,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -172,24 +168,36 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: boolean) + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {VALUE._col1} + 0 {VALUE._col1} + 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -2833,24 +2841,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-8 depends on stages: Stage-10 - Stage-7 depends on stages: Stage-8 , consists of Stage-9, Stage-2 - Stage-9 has a backup stage: Stage-2 - Stage-6 depends on stages: Stage-9 - Stage-3 depends on stages: Stage-2, Stage-6 - Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-6 depends on stages: Stage-7 , consists of Stage-8, Stage-1 + Stage-8 has a backup stage: Stage-1 + Stage-5 depends on stages: Stage-8 + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_1:src1 + $hdt$_1:$hdt$_1:$hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_1:src1 + $hdt$_1:$hdt$_1:$hdt$_1:src1 TableScan alias: src1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -2866,7 +2874,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-8 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -2900,10 +2908,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-7 + Stage: Stage-6 Conditional Operator - Stage: Stage-9 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -2914,13 +2922,13 @@ STAGE PLANS: TableScan HashTable Sink Operator filter predicates: - 0 - 1 {_col2} + 0 {_col2} + 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col2 (type: string) - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -2932,26 +2940,30 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {_col2} + 0 {_col2} + 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -2974,17 +2986,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -2999,24 +3003,36 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: boolean) + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {VALUE._col1} + 0 {VALUE._col1} + 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out index 0378c65eae..7da7516d27 100644 --- a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out +++ b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out @@ -104,14 +104,14 @@ FROM `default`.`orderpayment_small` WHERE `cityid` IS NOT NULL) AS `t8` ON `t2`.`cityid` = `t8`.`cityid`) ON `t0`.`userid` = `t2`.`userid` LIMIT 5 STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-4 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -209,7 +209,7 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [$hdt$_1:orderpayment, $hdt$_2:dim_pay_date] + /orderpayment_small [$hdt$_1:$hdt$_1:orderpayment, $hdt$_1:$hdt$_2:dim_pay_date] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -239,7 +239,7 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -348,7 +348,7 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [$hdt$_3:deal] + /orderpayment_small [$hdt$_1:$hdt$_3:deal] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: @@ -379,7 +379,7 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -488,7 +488,7 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [$hdt$_4:order_city] + /orderpayment_small [$hdt$_1:$hdt$_4:order_city] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: @@ -519,20 +519,9 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col3 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col4 (type: string), _col5 (type: int) - auto parallelism: false TableScan alias: user filterExpr: userid is not null (type: boolean) @@ -552,8 +541,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 + tag: 0 auto parallelism: false + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col3 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + value expressions: _col4 (type: string), _col5 (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -636,12 +636,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col5 + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col5, _col6 Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col4 (type: string), _col5 (type: int) + expressions: _col5 (type: string), _col6 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE Limit diff --git a/ql/src/test/results/clientpositive/auto_join_stats.q.out b/ql/src/test/results/clientpositive/auto_join_stats.q.out index d2ca52f75b..d8e5940996 100644 --- a/ql/src/test/results/clientpositive/auto_join_stats.q.out +++ b/ql/src/test/results/clientpositive/auto_join_stats.q.out @@ -320,28 +320,28 @@ POSTHOOK: Input: default@smalltable_n0 POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-11 is a root stage , consists of Stage-13, Stage-14, Stage-1 - Stage-13 has a backup stage: Stage-1 + Stage-10 is a root stage , consists of Stage-12, Stage-13, Stage-2 + Stage-12 has a backup stage: Stage-2 + Stage-8 depends on stages: Stage-12 + Stage-11 depends on stages: Stage-2, Stage-8, Stage-9 + Stage-6 depends on stages: Stage-11 + Stage-13 has a backup stage: Stage-2 Stage-9 depends on stages: Stage-13 - Stage-12 depends on stages: Stage-1, Stage-9, Stage-10 - Stage-7 depends on stages: Stage-12 - Stage-14 has a backup stage: Stage-1 - Stage-10 depends on stages: Stage-14 - Stage-1 - Stage-0 depends on stages: Stage-7 + Stage-2 + Stage-0 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-11 + Stage: Stage-10 Conditional Operator - Stage: Stage-13 + Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_2:src2 + $hdt$_1:$hdt$_2:src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_2:src2 + $hdt$_1:$hdt$_2:src2 TableScan alias: src2 filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) @@ -358,7 +358,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-9 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -390,13 +390,13 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-12 + Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:smalltable_n0 Fetch Operator limit: -1 - $hdt$_3:smalltable2_n0 + $hdt$_1:$hdt$_3:smalltable2_n0 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -414,9 +414,9 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 (_col1 + _col3) (type: double) - 1 _col1 (type: double) - $hdt$_3:smalltable2_n0 + 0 _col1 (type: double) + 1 (_col1 + _col3) (type: double) + $hdt$_1:$hdt$_3:smalltable2_n0 TableScan alias: smalltable2_n0 filterExpr: UDFToDouble(key) is not null (type: boolean) @@ -433,7 +433,7 @@ STAGE PLANS: 0 (_col1 + _col3) (type: double) 1 _col0 (type: double) - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -449,12 +449,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 (_col1 + _col3) (type: double) - 1 _col1 (type: double) - outputColumnNames: _col0, _col2, _col5 + 0 _col1 (type: double) + 1 (_col1 + _col3) (type: double) + outputColumnNames: _col0, _col2, _col4 Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col5 (type: string) + expressions: _col2 (type: string), _col4 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -468,14 +468,14 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-14 + Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:src1 + $hdt$_1:$hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:src1 + $hdt$_1:$hdt$_1:src1 TableScan alias: src1 filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) @@ -492,7 +492,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-10 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -524,7 +524,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/auto_join_stats2.q.out b/ql/src/test/results/clientpositive/auto_join_stats2.q.out index 235a2bacc5..7a29d0d8a3 100644 --- a/ql/src/test/results/clientpositive/auto_join_stats2.q.out +++ b/ql/src/test/results/clientpositive/auto_join_stats2.q.out @@ -178,21 +178,21 @@ POSTHOOK: Input: default@smalltable2 POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-7 depends on stages: Stage-10 - Stage-0 depends on stages: Stage-7 + Stage-9 is a root stage + Stage-6 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:smalltable Fetch Operator limit: -1 - $hdt$_1:src1 + $hdt$_1:$hdt$_1:src1 Fetch Operator limit: -1 - $hdt$_3:smalltable2 + $hdt$_1:$hdt$_3:smalltable2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -210,9 +210,9 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 (_col1 + _col3) (type: double) - 1 _col1 (type: double) - $hdt$_1:src1 + 0 _col1 (type: double) + 1 (_col1 + _col3) (type: double) + $hdt$_1:$hdt$_1:src1 TableScan alias: src1 filterExpr: (key is not null and UDFToDouble(key) is not null) (type: boolean) @@ -228,7 +228,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_3:smalltable2 + $hdt$_1:$hdt$_3:smalltable2 TableScan alias: smalltable2 filterExpr: UDFToDouble(key) is not null (type: boolean) @@ -245,7 +245,7 @@ STAGE PLANS: 0 (_col1 + _col3) (type: double) 1 _col0 (type: double) - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -279,12 +279,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 (_col1 + _col3) (type: double) - 1 _col1 (type: double) - outputColumnNames: _col0, _col2, _col5 + 0 _col1 (type: double) + 1 (_col1 + _col3) (type: double) + outputColumnNames: _col0, _col2, _col4 Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col5 (type: string) + expressions: _col2 (type: string), _col4 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 957 Data size: 181850 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out index d64581c66c..be68194839 100644 --- a/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out +++ b/ql/src/test/results/clientpositive/auto_join_without_localtask.q.out @@ -691,32 +691,32 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-11 is a root stage , consists of Stage-14, Stage-15, Stage-1 - Stage-14 has a backup stage: Stage-1 - Stage-9 depends on stages: Stage-14 - Stage-8 depends on stages: Stage-1, Stage-9, Stage-10 , consists of Stage-12, Stage-7, Stage-2 - Stage-12 has a backup stage: Stage-2 + Stage-10 is a root stage , consists of Stage-13, Stage-14, Stage-3 + Stage-13 has a backup stage: Stage-3 + Stage-8 depends on stages: Stage-13 + Stage-7 depends on stages: Stage-3, Stage-8, Stage-9 , consists of Stage-5, Stage-12, Stage-1 + Stage-5 has a backup stage: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-5, Stage-6 + Stage-12 has a backup stage: Stage-1 Stage-6 depends on stages: Stage-12 - Stage-3 depends on stages: Stage-2, Stage-6, Stage-7 - Stage-7 has a backup stage: Stage-2 - Stage-2 - Stage-15 has a backup stage: Stage-1 - Stage-10 depends on stages: Stage-15 Stage-1 - Stage-0 depends on stages: Stage-3 + Stage-14 has a backup stage: Stage-3 + Stage-9 depends on stages: Stage-14 + Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-11 + Stage: Stage-10 Conditional Operator - Stage: Stage-14 + Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_2:b + $hdt$_1:$hdt$_2:b Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_2:b + $hdt$_1:$hdt$_2:b TableScan alias: b filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) @@ -733,7 +733,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-9 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -764,17 +764,12 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-12 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:c + Stage: Stage-5 + Map Reduce + Map Operator Tree: TableScan alias: c filterExpr: value is not null (type: boolean) @@ -786,33 +781,35 @@ STAGE PLANS: expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -838,9 +835,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-7 - Map Reduce - Map Operator Tree: + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:c + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:c TableScan alias: c filterExpr: value is not null (type: boolean) @@ -852,41 +854,39 @@ STAGE PLANS: expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 + HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + 0 _col0 (type: string) + 1 _col1 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) TableScan alias: c filterExpr: value is not null (type: boolean) @@ -904,30 +904,42 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 270 Data size: 48060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-15 + Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:a + $hdt$_1:$hdt$_1:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:a + $hdt$_1:$hdt$_1:a TableScan alias: a filterExpr: ((UDFToDouble(key) > 100.0D) and value is not null) (type: boolean) @@ -944,7 +956,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-10 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -975,7 +987,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1043,13 +1055,13 @@ POSTHOOK: query: select a.* from src a join src b on a.key=b.key join src c on a POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -RUN: Stage-11:CONDITIONAL -RUN: Stage-14:MAPREDLOCAL -RUN: Stage-9:MAPRED -RUN: Stage-8:CONDITIONAL +RUN: Stage-10:CONDITIONAL +RUN: Stage-13:MAPREDLOCAL +RUN: Stage-8:MAPRED +RUN: Stage-7:CONDITIONAL RUN: Stage-12:MAPREDLOCAL RUN: Stage-6:MAPRED -RUN: Stage-3:MAPRED +RUN: Stage-2:MAPRED 103 val_103 103 val_103 103 val_103 @@ -1104,13 +1116,13 @@ POSTHOOK: query: select a.* from src a join src b on a.key=b.key join src c on a POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -RUN: Stage-11:CONDITIONAL -RUN: Stage-14:MAPREDLOCAL -RUN: Stage-1:MAPRED -RUN: Stage-8:CONDITIONAL +RUN: Stage-10:CONDITIONAL +RUN: Stage-13:MAPREDLOCAL +RUN: Stage-3:MAPRED +RUN: Stage-7:CONDITIONAL RUN: Stage-12:MAPREDLOCAL +RUN: Stage-1:MAPRED RUN: Stage-2:MAPRED -RUN: Stage-3:MAPRED 103 val_103 103 val_103 103 val_103 diff --git a/ql/src/test/results/clientpositive/cbo_const.q.out b/ql/src/test/results/clientpositive/cbo_const.q.out index 33e6f97f2d..b659c35899 100644 --- a/ql/src/test/results/clientpositive/cbo_const.q.out +++ b/ql/src/test/results/clientpositive/cbo_const.q.out @@ -242,12 +242,12 @@ POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -301,17 +301,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: string) TableScan alias: y filterExpr: (UDFToDouble(key) = 3.0D) (type: boolean) @@ -330,6 +322,14 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -337,10 +337,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col4 + outputColumnNames: _col1, _col2, _col4 Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out index 74a7aa89e7..524d828153 100644 --- a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out +++ b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -13,12 +13,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -69,24 +69,20 @@ STAGE PLANS: Filter Operator predicate: ((_col4 and _col2) or _col5) (type: boolean) Statistics: Num rows: 3 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 3 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 3 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: string) TableScan alias: g filterExpr: (value <> '') (type: boolean) @@ -104,17 +100,25 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 3 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col3, _col6 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col4 Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col3 (type: string), _col6 (type: string) + expressions: _col4 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -146,12 +150,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -202,24 +206,20 @@ STAGE PLANS: Filter Operator predicate: ((_col4 and _col2) or _col5) (type: boolean) Statistics: Num rows: 4 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 4 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 4 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: string) TableScan alias: g filterExpr: (value <> '') (type: boolean) @@ -237,17 +237,25 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 4 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col3, _col6 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col4 Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col3 (type: string), _col6 (type: string) + expressions: _col4 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out index dd9fa631b4..5e91f3c22f 100644 --- a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out @@ -45,12 +45,12 @@ INNER JOIN (SELECT `key`, `value` FROM `default`.`filter_join_breaktask` WHERE `key` IS NOT NULL AND `value` <> '' AND `ds` = '2008-04-08') AS `t4` ON `t2`.`key` = `t4`.`key`) ON `t0`.`value` = `t4`.`value` STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -148,7 +148,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_1:f, $hdt$_2:m] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_1:$hdt$_1:f, $hdt$_1:$hdt$_2:m] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -178,20 +178,9 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - GatherStats: false - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 15 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col0 (type: int) - auto parallelism: false TableScan alias: g filterExpr: ((value <> '') and (ds = '2008-04-08')) (type: boolean) @@ -211,8 +200,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 + tag: 0 auto parallelism: false + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 15 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + value expressions: _col0 (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -295,12 +295,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col3 + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 19 Data size: 1747 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col3 (type: string) + expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 19 Data size: 1747 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/join12.q.out b/ql/src/test/results/clientpositive/join12.q.out index cb668295fe..23bc859d4b 100644 --- a/ql/src/test/results/clientpositive/join12.q.out +++ b/ql/src/test/results/clientpositive/join12.q.out @@ -25,12 +25,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -83,16 +83,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src filterExpr: (UDFToDouble(key) < 80.0D) (type: boolean) @@ -111,6 +104,13 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -118,10 +118,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col3 + outputColumnNames: _col1, _col2 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col3 (type: string) + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/join13.q.out b/ql/src/test/results/clientpositive/join13.q.out index 821772be84..74f7923c2c 100644 --- a/ql/src/test/results/clientpositive/join13.q.out +++ b/ql/src/test/results/clientpositive/join13.q.out @@ -25,12 +25,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -84,17 +84,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) - Statistics: Num rows: 166 Data size: 43990 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string) TableScan alias: src filterExpr: (UDFToDouble(key) < 200.0D) (type: boolean) @@ -112,17 +104,25 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) + Statistics: Num rows: 166 Data size: 43990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) - 1 _col0 (type: double) - outputColumnNames: _col1, _col2 + 0 _col0 (type: double) + 1 (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) + outputColumnNames: _col2, _col3 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: string), _col1 (type: string) + expressions: _col3 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/join20.q.out b/ql/src/test/results/clientpositive/join20.q.out index 02f71ddae0..fd8f589d23 100644 --- a/ql/src/test/results/clientpositive/join20.q.out +++ b/ql/src/test/results/clientpositive/join20.q.out @@ -11,13 +11,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -72,17 +72,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -97,26 +89,38 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: boolean) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {VALUE._col1} + 0 {VALUE._col1} + 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -716,13 +720,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -777,17 +781,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -802,26 +798,38 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: boolean) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {VALUE._col1} + 0 {VALUE._col1} + 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/join21.q.out b/ql/src/test/results/clientpositive/join21.q.out index 58663573a9..d8db828da0 100644 --- a/ql/src/test/results/clientpositive/join21.q.out +++ b/ql/src/test/results/clientpositive/join21.q.out @@ -9,13 +9,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -68,17 +68,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -93,26 +85,38 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: boolean) + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {VALUE._col1} + 0 {VALUE._col1} + 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/join26.q.out b/ql/src/test/results/clientpositive/join26.q.out index aaa6bf2cd4..393a3e49e9 100644 --- a/ql/src/test/results/clientpositive/join26.q.out +++ b/ql/src/test/results/clientpositive/join26.q.out @@ -39,14 +39,14 @@ INNER JOIN (SELECT `key` FROM `default`.`src1` WHERE `key` IS NOT NULL) AS `t4` ON `t2`.`key` = `t4`.`key`) ON `t0`.`key` = `t4`.`key` STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-7 depends on stages: Stage-9 - Stage-0 depends on stages: Stage-7 - Stage-3 depends on stages: Stage-0, Stage-4 - Stage-4 depends on stages: Stage-7 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-9 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:z @@ -101,7 +101,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - $hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -121,10 +121,10 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - $hdt$_2:x + 0 _col0 (type: string) + 1 _col2 (type: string) + Position of Big Table: 1 + $hdt$_1:$hdt$_2:x TableScan alias: x filterExpr: key is not null (type: boolean) @@ -144,7 +144,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -173,13 +173,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col4 - Position of Big Table: 0 + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col1, _col3, _col4 + Position of Big Table: 1 Statistics: Num rows: 61 Data size: 16348 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) + expressions: _col4 (type: string), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 61 Data size: 16348 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -394,7 +394,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:y] + /src [$hdt$_1:$hdt$_1:y] Stage: Stage-0 Move Operator @@ -425,7 +425,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1_n10 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: #### A masked pattern was here #### @@ -435,7 +435,7 @@ STAGE PLANS: Table: default.dest_j1_n10 Is Table Level Stats: true - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -453,7 +453,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10003 + base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: diff --git a/ql/src/test/results/clientpositive/join28.q.out b/ql/src/test/results/clientpositive/join28.q.out index b1a559ceea..f893fbe403 100644 --- a/ql/src/test/results/clientpositive/join28.q.out +++ b/ql/src/test/results/clientpositive/join28.q.out @@ -33,20 +33,20 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1_n11 STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-7 depends on stages: Stage-9 - Stage-0 depends on stages: Stage-7 - Stage-3 depends on stages: Stage-0, Stage-4 - Stage-4 depends on stages: Stage-7 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-9 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:z Fetch Operator limit: -1 - $hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -64,9 +64,9 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - $hdt$_2:x + 0 _col0 (type: string) + 1 _col1 (type: string) + $hdt$_1:$hdt$_2:x TableScan alias: x filterExpr: key is not null (type: boolean) @@ -83,7 +83,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -109,12 +109,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col1 (type: string) outputColumnNames: _col1, _col3 Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: string), _col3 (type: string) + expressions: _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -154,7 +154,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1_n11 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -162,7 +162,7 @@ STAGE PLANS: Column Types: string, string Table: default.dest_j1_n11 - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/join32.q.out b/ql/src/test/results/clientpositive/join32.q.out index d45e6b9298..1eab910353 100644 --- a/ql/src/test/results/clientpositive/join32.q.out +++ b/ql/src/test/results/clientpositive/join32.q.out @@ -39,14 +39,14 @@ INNER JOIN (SELECT `key`, `value` FROM `default`.`src1` WHERE `key` IS NOT NULL AND `value` IS NOT NULL) AS `t4` ON `t2`.`key` = `t4`.`key`) ON `t0`.`value` = `t4`.`value` STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-7 depends on stages: Stage-9 - Stage-0 depends on stages: Stage-7 - Stage-3 depends on stages: Stage-0, Stage-4 - Stage-4 depends on stages: Stage-7 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-9 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:z @@ -101,7 +101,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - $hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -121,10 +121,10 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - $hdt$_2:x + 0 _col0 (type: string) + 1 _col3 (type: string) + Position of Big Table: 1 + $hdt$_1:$hdt$_2:x TableScan alias: x filterExpr: (key is not null and value is not null) (type: boolean) @@ -144,7 +144,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -173,13 +173,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col4 - Position of Big Table: 0 + 0 _col0 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col0, _col2, _col3 + Position of Big Table: 1 Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) + expressions: _col3 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -394,7 +394,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:y] + /src [$hdt$_1:$hdt$_1:y] Stage: Stage-0 Move Operator @@ -425,7 +425,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1_n12 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: #### A masked pattern was here #### @@ -435,7 +435,7 @@ STAGE PLANS: Table: default.dest_j1_n12 Is Table Level Stats: true - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -453,7 +453,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10003 + base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: diff --git a/ql/src/test/results/clientpositive/join33.q.out b/ql/src/test/results/clientpositive/join33.q.out index 04d995181a..14ce977a57 100644 --- a/ql/src/test/results/clientpositive/join33.q.out +++ b/ql/src/test/results/clientpositive/join33.q.out @@ -39,14 +39,14 @@ INNER JOIN (SELECT `key`, `value` FROM `default`.`src1` WHERE `key` IS NOT NULL AND `value` IS NOT NULL) AS `t4` ON `t2`.`key` = `t4`.`key`) ON `t0`.`value` = `t4`.`value` STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-7 depends on stages: Stage-9 - Stage-0 depends on stages: Stage-7 - Stage-3 depends on stages: Stage-0, Stage-4 - Stage-4 depends on stages: Stage-7 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-9 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:z @@ -101,7 +101,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - $hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -121,10 +121,10 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - $hdt$_2:x + 0 _col0 (type: string) + 1 _col3 (type: string) + Position of Big Table: 1 + $hdt$_1:$hdt$_2:x TableScan alias: x filterExpr: (key is not null and value is not null) (type: boolean) @@ -144,7 +144,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -173,13 +173,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col4 - Position of Big Table: 0 + 0 _col0 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col0, _col2, _col3 + Position of Big Table: 1 Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) + expressions: _col3 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -394,7 +394,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:y] + /src [$hdt$_1:$hdt$_1:y] Stage: Stage-0 Move Operator @@ -425,7 +425,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1_n7 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: #### A masked pattern was here #### @@ -435,7 +435,7 @@ STAGE PLANS: Table: default.dest_j1_n7 Is Table Level Stats: true - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -453,7 +453,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10003 + base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: diff --git a/ql/src/test/results/clientpositive/join40.q.out b/ql/src/test/results/clientpositive/join40.q.out index f33ff5a0d6..de37d0cd96 100644 --- a/ql/src/test/results/clientpositive/join40.q.out +++ b/ql/src/test/results/clientpositive/join40.q.out @@ -1778,13 +1778,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1839,17 +1839,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -1864,26 +1856,38 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: boolean) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {VALUE._col1} + 0 {VALUE._col1} + 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -2483,13 +2487,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -2544,17 +2548,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: src3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -2569,26 +2565,38 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: boolean) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 + Left Outer Join 0 to 1 filter predicates: - 0 - 1 {VALUE._col1} + 0 {VALUE._col1} + 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 762 Data size: 229264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out index 28fcdeeece..36a9ec2a77 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out @@ -244,7 +244,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from part p1 join part p2 join part p3 on p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name PREHOOK: type: QUERY @@ -256,12 +256,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -316,15 +316,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE @@ -337,6 +331,12 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -347,19 +347,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 702 Data size: 1303614 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((_col0 + _col18) = _col18) (type: boolean) + predicate: ((_col9 + _col0) = _col0) (type: boolean) Statistics: Num rows: 351 Data size: 651807 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + File Output Operator + compressed: false Statistics: Num rows: 351 Data size: 651807 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 351 Data size: 651807 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -367,7 +363,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from part p1 join part p2 join part p3 on p2.p_partkey = 1 and p3.p_name = p2.p_name PREHOOK: type: QUERY @@ -379,12 +375,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -439,15 +435,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1234 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE @@ -460,6 +450,12 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1234 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -470,7 +466,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 26 Data size: 48178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col17 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: int), _col23 (type: string), _col24 (type: double), _col25 (type: string), 1 (type: int), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: int), _col23 (type: string), _col24 (type: double), _col25 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 26 Data size: 48282 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out index d75d553cad..bf3e7ba587 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out @@ -248,7 +248,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from part p1 join part p2 join part p3 where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name @@ -262,12 +262,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -322,15 +322,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE @@ -343,6 +337,12 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 27 Data size: 33426 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -353,19 +353,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 702 Data size: 1303614 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((_col0 + _col18) = _col18) (type: boolean) + predicate: ((_col9 + _col0) = _col0) (type: boolean) Statistics: Num rows: 351 Data size: 651807 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + File Output Operator + compressed: false Statistics: Num rows: 351 Data size: 651807 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 351 Data size: 651807 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -373,7 +369,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from part p1 join part p2 join part p3 where p2.p_partkey = 1 and p3.p_name = p2.p_name @@ -387,12 +383,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -447,15 +443,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1234 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE @@ -468,6 +458,12 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1234 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -478,7 +474,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 26 Data size: 48178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col17 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: int), _col23 (type: string), _col24 (type: double), _col25 (type: string), 1 (type: int), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: int), _col23 (type: string), _col24 (type: double), _col25 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 26 Data size: 48282 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out index 18f828af2c..0642606bbb 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out @@ -69,12 +69,12 @@ POSTHOOK: Input: default@part2_n0 POSTHOOK: Input: default@part3_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -129,17 +129,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 filterExpr: p_name is not null (type: boolean) @@ -158,6 +150,14 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -167,17 +167,13 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + File Output Operator + compressed: false Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -200,12 +196,12 @@ POSTHOOK: Input: default@part2_n0 POSTHOOK: Input: default@part3_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -260,17 +256,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 filterExpr: p_name is not null (type: boolean) @@ -289,6 +277,14 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -298,17 +294,13 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + File Output Operator + compressed: false Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -316,7 +308,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from part p1 join part2_n0 p2 join part3_n0 p3 on p2_partkey + p_partkey = p1.p_partkey and p3_name = p2_name PREHOOK: type: QUERY @@ -332,12 +324,12 @@ POSTHOOK: Input: default@part2_n0 POSTHOOK: Input: default@part3_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -392,15 +384,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE @@ -413,6 +399,12 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -423,19 +415,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 26 Data size: 48152 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 + _col18) = _col18) (type: boolean) + predicate: ((_col9 + _col0) = _col0) (type: boolean) Statistics: Num rows: 13 Data size: 24076 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + File Output Operator + compressed: false Statistics: Num rows: 13 Data size: 24076 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 24076 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -443,7 +431,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from part p1 join part2_n0 p2 join part3_n0 p3 on p2_partkey = 1 and p3_name = p2_name PREHOOK: type: QUERY @@ -459,12 +447,12 @@ POSTHOOK: Input: default@part2_n0 POSTHOOK: Input: default@part3_n0 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -519,15 +507,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE @@ -540,6 +522,12 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -550,7 +538,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 26 Data size: 48152 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col17 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: int), _col23 (type: string), _col24 (type: double), _col25 (type: string), 1 (type: int), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: int), _col23 (type: string), _col24 (type: double), _col25 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 26 Data size: 48152 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out index b491187560..231096c3f9 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual2.q.out @@ -69,13 +69,13 @@ POSTHOOK: Input: default@part2 POSTHOOK: Input: default@part3 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -130,17 +130,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 filterExpr: p_name is not null (type: boolean) @@ -159,6 +151,14 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -175,17 +175,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col19 (type: string) + key expressions: _col1 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col19 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) TableScan alias: p4 filterExpr: p_name is not null (type: boolean) @@ -209,21 +209,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col19 (type: string) + 0 _col1 (type: string) 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + File Output Operator + compressed: false Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -248,13 +244,13 @@ POSTHOOK: Input: default@part2 POSTHOOK: Input: default@part3 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -309,17 +305,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) @@ -338,6 +326,14 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -354,17 +350,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col18 (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col18 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) TableScan alias: p4 filterExpr: p_partkey is not null (type: boolean) @@ -388,21 +384,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col18 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + File Output Operator + compressed: false Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out index 16a13efcdc..41390f2460 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out @@ -71,12 +71,12 @@ POSTHOOK: Input: default@part2_n5 POSTHOOK: Input: default@part3_n2 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -131,17 +131,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 filterExpr: p_name is not null (type: boolean) @@ -160,6 +152,14 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -169,17 +169,13 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + File Output Operator + compressed: false Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -204,12 +200,12 @@ POSTHOOK: Input: default@part2_n5 POSTHOOK: Input: default@part3_n2 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -264,17 +260,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 filterExpr: p_name is not null (type: boolean) @@ -293,6 +281,14 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -302,17 +298,13 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + File Output Operator + compressed: false Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -320,7 +312,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from part p1 join part2_n5 p2 join part3_n2 p3 where p2_partkey + p1.p_partkey = p1.p_partkey and p3_name = p2_name @@ -338,12 +330,12 @@ POSTHOOK: Input: default@part2_n5 POSTHOOK: Input: default@part3_n2 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -398,15 +390,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE @@ -419,6 +405,12 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -429,19 +421,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 26 Data size: 48152 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 + _col18) = _col18) (type: boolean) + predicate: ((_col9 + _col0) = _col0) (type: boolean) Statistics: Num rows: 13 Data size: 24076 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + File Output Operator + compressed: false Statistics: Num rows: 13 Data size: 24076 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 24076 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -449,7 +437,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from part p1 join part2_n5 p2 join part3_n2 p3 where p2_partkey = 1 and p3_name = p2_name @@ -467,12 +455,12 @@ POSTHOOK: Input: default@part2_n5 POSTHOOK: Input: default@part3_n2 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -527,15 +515,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) TableScan alias: p1 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE @@ -548,6 +530,12 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -558,7 +546,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 26 Data size: 48152 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col17 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: int), _col23 (type: string), _col24 (type: double), _col25 (type: string), 1 (type: int), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: int), _col23 (type: string), _col24 (type: double), _col25 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 26 Data size: 48152 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out index 2a1f8a4ddc..cd3ccae157 100644 --- a/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out +++ b/ql/src/test/results/clientpositive/join_cond_pushdown_unqual4.q.out @@ -71,13 +71,13 @@ POSTHOOK: Input: default@part2_n4 POSTHOOK: Input: default@part3_n1 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -132,17 +132,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 filterExpr: p_name is not null (type: boolean) @@ -161,6 +153,14 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -177,17 +177,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col19 (type: string) + key expressions: _col1 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col19 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) TableScan alias: p4 filterExpr: p_name is not null (type: boolean) @@ -211,21 +211,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col19 (type: string) + 0 _col1 (type: string) 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + File Output Operator + compressed: false Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -252,13 +248,13 @@ POSTHOOK: Input: default@part2_n4 POSTHOOK: Input: default@part3_n1 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -313,17 +309,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) TableScan alias: p1 filterExpr: (p_name is not null and p_partkey is not null) (type: boolean) @@ -342,6 +330,14 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -358,17 +354,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col18 (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col18 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) TableScan alias: p4 filterExpr: p_partkey is not null (type: boolean) @@ -392,21 +388,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col18 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + File Output Operator + compressed: false Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/auto_join21.q.out b/ql/src/test/results/clientpositive/llap/auto_join21.q.out index 21e5e5e09b..dc4355bdaa 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join21.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join21.q.out @@ -17,12 +17,45 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 3 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 Map Operator Tree: TableScan alias: src1 @@ -42,7 +75,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 4 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) @@ -53,7 +86,7 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -74,36 +107,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {_col2} - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 0 Map 1 - Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 4 + Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/auto_join29.q.out b/ql/src/test/results/clientpositive/llap/auto_join29.q.out index dc560dcd49..ef9e945a30 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join29.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join29.q.out @@ -17,12 +17,45 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 3 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 Map Operator Tree: TableScan alias: src1 @@ -42,7 +75,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 4 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) @@ -53,7 +86,7 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -74,36 +107,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {_col2} - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 0 Map 1 - Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 4 + Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -2722,12 +2726,45 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Map 3 <- Map 2 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Map 4 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 Map Operator Tree: TableScan alias: src1 @@ -2748,7 +2785,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -2771,7 +2808,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 0 Map 1 + 0 Map 3 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) @@ -2782,36 +2819,7 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {_col2} - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 0 Map 2 - Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 501 Data size: 89890 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 4 + Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out index 11b3f4d5c9..d83d90fd65 100644 --- a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out @@ -1368,11 +1368,32 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key < 6) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan alias: a @@ -1412,27 +1433,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap - Map 5 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out index ca585a0562..29ba374300 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out @@ -1608,11 +1608,32 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key < 6) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan alias: a @@ -1652,27 +1673,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap - Map 5 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3315,11 +3315,32 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key < 6) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan alias: a @@ -3359,27 +3380,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap - Map 5 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/column_access_stats.q.out b/ql/src/test/results/clientpositive/llap/column_access_stats.q.out index ba4aa68d50..7da9b96225 100644 --- a/ql/src/test/results/clientpositive/llap/column_access_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/column_access_stats.q.out @@ -759,75 +759,97 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t2_n75 + alias: t3_n29 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan - alias: t1_n127 + alias: t2_n75 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan - alias: t3_n29 + alias: t1_n127 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 5 Data size: 951 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 951 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 951 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -844,28 +866,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 5 Data size: 513 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 513 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 513 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out index 19238bc173..d3368a6da0 100644 --- a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out @@ -2573,26 +2573,28 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0]) - HiveFilter(condition=[>($1, 0:DECIMAL(1, 0))]) - HiveAggregate(group=[{1}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], /=[$2], d_date_sk=[$3]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), 2:DECIMAL(10, 0))]) HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveFilter(condition=[>($1, 0:DECIMAL(1, 0))]) + HiveAggregate(group=[{1}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], /=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) PREHOOK: query: EXPLAIN CBO SELECT diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out index 578ea678bc..c59275676a 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out @@ -2557,12 +2557,34 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan alias: x @@ -2598,47 +2620,24 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 29 Data size: 7801 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 28 Data size: 7532 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + expressions: hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 29 Data size: 7801 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 7532 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) - minReductionHashAggr: 0.9655172 + minReductionHashAggr: 0.96428573 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE @@ -2662,7 +2661,25 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2734,12 +2751,33 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan alias: x @@ -2775,27 +2813,6 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2805,28 +2822,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: bigint) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col1, _col2, _col3, _col4 Statistics: Num rows: 28 Data size: 7532 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + expressions: hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 28 Data size: 7532 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2840,7 +2839,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2856,6 +2855,24 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2926,12 +2943,34 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan alias: x @@ -2967,47 +3006,24 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 29 Data size: 7801 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 28 Data size: 7532 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + expressions: hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 29 Data size: 7801 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 7532 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) - minReductionHashAggr: 0.9655172 + minReductionHashAggr: 0.96428573 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE @@ -3031,7 +3047,25 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -3103,12 +3137,33 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan alias: x @@ -3144,27 +3199,6 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3174,28 +3208,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: bigint) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col1, _col2, _col3, _col4 Statistics: Num rows: 28 Data size: 7532 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + expressions: hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 28 Data size: 7532 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -3209,7 +3225,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -3225,6 +3241,24 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 4842 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out index e1fed2b0b2..ebe1719f42 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer6.q.out @@ -1932,37 +1932,43 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: xx + alias: x filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 6 Map Operator Tree: TableScan - alias: x + alias: y filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -1978,33 +1984,27 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 7 Map Operator Tree: TableScan - alias: y + alias: xx filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -2016,16 +2016,39 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.6005057 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 39 Data size: 6747 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 39 Data size: 6747 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 3 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -2034,10 +2057,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 39 Data size: 7059 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + expressions: _col3 (type: string), _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 39 Data size: 7059 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -2056,38 +2079,15 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.6005057 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 6747 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 39 Data size: 6747 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -2183,37 +2183,43 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: xx + alias: x filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 6 Map Operator Tree: TableScan - alias: x + alias: y filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -2229,33 +2235,27 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 7 Map Operator Tree: TableScan - alias: y + alias: xx filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -2267,16 +2267,39 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.6005057 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 39 Data size: 6747 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 39 Data size: 6747 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 3 + Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -2285,10 +2308,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 39 Data size: 7059 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + expressions: _col3 (type: string), _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 39 Data size: 7059 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -2307,38 +2330,15 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 791 Data size: 68817 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - minReductionHashAggr: 0.6005057 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 39 Data size: 6747 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 316 Data size: 30020 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 39 Data size: 6747 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out index 7076388e18..4017621013 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out @@ -625,12 +625,33 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc_int_n1 + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 Map Operator Tree: TableScan alias: srcpart_date_n7 @@ -651,7 +672,7 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: srcpart_small_n3 @@ -672,27 +693,6 @@ STAGE PLANS: Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 - Map Operator Tree: - TableScan - alias: alltypesorc_int_n1 - filterExpr: cstring is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cstring is not null (type: boolean) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -701,24 +701,7 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() @@ -731,7 +714,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -746,6 +729,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column stats: NONE Stage: Stage-0 Fetch Operator @@ -803,15 +803,36 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 6 (BROADCAST_EDGE) - Map 7 <- Reducer 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Reducer 7 (BROADCAST_EDGE) + Map 4 <- Reducer 7 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc_int_n1 + filterExpr: (cstring is not null and cstring BETWEEN DynamicValue(RS_10_srcpart_small_n3_key1_min) AND DynamicValue(RS_10_srcpart_small_n3_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_n3_key1_bloom_filter))) (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (cstring is not null and cstring BETWEEN DynamicValue(RS_10_srcpart_small_n3_key1_min) AND DynamicValue(RS_10_srcpart_small_n3_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_n3_key1_bloom_filter))) (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 Map Operator Tree: TableScan alias: srcpart_date_n7 @@ -832,7 +853,7 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: srcpart_small_n3 @@ -868,27 +889,6 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 7 - Map Operator Tree: - TableScan - alias: alltypesorc_int_n1 - filterExpr: (cstring is not null and cstring BETWEEN DynamicValue(RS_10_srcpart_small_n3_key1_min) AND DynamicValue(RS_10_srcpart_small_n3_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_n3_key1_bloom_filter))) (type: boolean) - Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (cstring is not null and cstring BETWEEN DynamicValue(RS_10_srcpart_small_n3_key1_min) AND DynamicValue(RS_10_srcpart_small_n3_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_n3_key1_bloom_filter))) (type: boolean) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -897,24 +897,7 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() @@ -927,7 +910,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -942,7 +925,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 2200 Data size: 191400 Basic stats: PARTIAL Column stats: NONE + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1317,12 +1317,33 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc_int_n1 + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 Map Operator Tree: TableScan alias: srcpart_date_n7 @@ -1344,7 +1365,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: srcpart_small_n3 @@ -1365,27 +1386,6 @@ STAGE PLANS: Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 - Map Operator Tree: - TableScan - alias: alltypesorc_int_n1 - filterExpr: cstring is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cstring is not null (type: boolean) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1394,24 +1394,7 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 2200 Data size: 391600 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 2200 Data size: 391600 Basic stats: PARTIAL Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() @@ -1424,7 +1407,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1439,6 +1422,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 2200 Data size: 391600 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 2200 Data size: 391600 Basic stats: PARTIAL Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1496,21 +1496,42 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 7 (BROADCAST_EDGE) - Map 8 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Reducer 6 (BROADCAST_EDGE) + Map 4 <- Reducer 8 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: srcpart_date_n7 - filterExpr: (key is not null and value is not null and key BETWEEN DynamicValue(RS_10_srcpart_small_n3_key1_min) AND DynamicValue(RS_10_srcpart_small_n3_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_n3_key1_bloom_filter))) (type: boolean) - Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + alias: alltypesorc_int_n1 + filterExpr: (cstring is not null and cstring BETWEEN DynamicValue(RS_14_srcpart_date_n7_value_min) AND DynamicValue(RS_14_srcpart_date_n7_value_max) and in_bloom_filter(cstring, DynamicValue(RS_14_srcpart_date_n7_value_bloom_filter))) (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (cstring is not null and cstring BETWEEN DynamicValue(RS_14_srcpart_date_n7_value_min) AND DynamicValue(RS_14_srcpart_date_n7_value_max) and in_bloom_filter(cstring, DynamicValue(RS_14_srcpart_date_n7_value_bloom_filter))) (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: srcpart_date_n7 + filterExpr: (key is not null and value is not null and key BETWEEN DynamicValue(RS_10_srcpart_small_n3_key1_min) AND DynamicValue(RS_10_srcpart_small_n3_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_n3_key1_bloom_filter))) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null and key BETWEEN DynamicValue(RS_10_srcpart_small_n3_key1_min) AND DynamicValue(RS_10_srcpart_small_n3_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_n3_key1_bloom_filter))) (type: boolean) Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE @@ -1527,7 +1548,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: srcpart_small_n3 @@ -1563,27 +1584,6 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 8 - Map Operator Tree: - TableScan - alias: alltypesorc_int_n1 - filterExpr: (cstring is not null and cstring BETWEEN DynamicValue(RS_12_srcpart_date_n7_value_min) AND DynamicValue(RS_12_srcpart_date_n7_value_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_n7_value_bloom_filter))) (type: boolean) - Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (cstring is not null and cstring BETWEEN DynamicValue(RS_12_srcpart_date_n7_value_min) AND DynamicValue(RS_12_srcpart_date_n7_value_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_n7_value_bloom_filter))) (type: boolean) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1592,39 +1592,7 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 2200 Data size: 391600 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 2200 Data size: 391600 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2200 Data size: 391600 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2200) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() @@ -1637,7 +1605,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1653,6 +1621,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 2200 Data size: 391600 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 2200 Data size: 391600 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2200 Data size: 391600 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2200) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1665,7 +1665,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 7 + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2523,12 +2523,47 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc_int_n1 + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 Map Operator Tree: TableScan alias: srcpart_date_n7 @@ -2549,7 +2584,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 2 + 1 Map 4 Statistics: Num rows: 2200 Data size: 391600 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) @@ -2559,7 +2594,7 @@ STAGE PLANS: Statistics: Num rows: 2200 Data size: 391600 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: srcpart_small_n3 @@ -2580,42 +2615,7 @@ STAGE PLANS: Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL Execution mode: vectorized, llap LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: alltypesorc_int_n1 - filterExpr: cstring is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cstring is not null (type: boolean) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 0 Map 1 - Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 4 + Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2687,12 +2687,47 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc_int_n1 + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 Map Operator Tree: TableScan alias: srcpart_date_n7 @@ -2713,7 +2748,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 2 + 1 Map 4 Statistics: Num rows: 2200 Data size: 391600 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) @@ -2723,7 +2758,7 @@ STAGE PLANS: Statistics: Num rows: 2200 Data size: 391600 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: srcpart_small_n3 @@ -2744,42 +2779,7 @@ STAGE PLANS: Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL Execution mode: vectorized, llap LLAP IO: all inputs - Map 3 - Map Operator Tree: - TableScan - alias: alltypesorc_int_n1 - filterExpr: cstring is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cstring is not null (type: boolean) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 0 Map 1 - Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 4 + Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2852,13 +2852,48 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) - Map 5 <- Map 1 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Map 3 <- Map 5 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc_int_n1 + filterExpr: (cstring is not null and cstring BETWEEN DynamicValue(RS_14_srcpart_date_n7_value_min) AND DynamicValue(RS_14_srcpart_date_n7_value_max) and in_bloom_filter(cstring, DynamicValue(RS_14_srcpart_date_n7_value_bloom_filter))) (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (cstring is not null and cstring BETWEEN DynamicValue(RS_14_srcpart_date_n7_value_min) AND DynamicValue(RS_14_srcpart_date_n7_value_max) and in_bloom_filter(cstring, DynamicValue(RS_14_srcpart_date_n7_value_bloom_filter))) (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 Map Operator Tree: TableScan alias: srcpart_date_n7 @@ -2879,7 +2914,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 3 + 1 Map 5 Statistics: Num rows: 2200 Data size: 391600 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) @@ -2904,7 +2939,7 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: srcpart_small_n3 @@ -2940,42 +2975,22 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: alltypesorc_int_n1 - filterExpr: (cstring is not null and cstring BETWEEN DynamicValue(RS_12_srcpart_date_n7_value_min) AND DynamicValue(RS_12_srcpart_date_n7_value_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_n7_value_bloom_filter))) (type: boolean) - Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (cstring is not null and cstring BETWEEN DynamicValue(RS_12_srcpart_date_n7_value_min) AND DynamicValue(RS_12_srcpart_date_n7_value_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_n7_value_bloom_filter))) (type: boolean) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cstring (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 0 Map 1 - Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -2988,7 +3003,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 4 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -3001,21 +3016,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -3464,12 +3464,44 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date_n7 + filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 720000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 2200 Data size: 404800 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 Map Operator Tree: TableScan alias: srcpart_small_n3 @@ -3490,7 +3522,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 2 + 1 Map 4 Statistics: Num rows: 22 Data size: 5962 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) @@ -3513,10 +3545,10 @@ STAGE PLANS: Target Input: srcpart_date_n7 Partition key expr: ds Statistics: Num rows: 22 Data size: 5962 Basic stats: PARTIAL Column stats: NONE - Target Vertex: Map 3 + Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: srcpart_small10 @@ -3537,39 +3569,7 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: srcpart_date_n7 - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 720000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 0 Map 1 - Statistics: Num rows: 2200 Data size: 404800 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 4 + Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -3641,13 +3641,45 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE) - Map 4 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date_n7 + filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 720000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 2200 Data size: 404800 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 Map Operator Tree: TableScan alias: srcpart_small_n3 @@ -3668,7 +3700,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 2 + 1 Map 4 Statistics: Num rows: 22 Data size: 5962 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) @@ -3691,10 +3723,10 @@ STAGE PLANS: Target Input: srcpart_date_n7 Partition key expr: ds Statistics: Num rows: 22 Data size: 5962 Basic stats: PARTIAL Column stats: NONE - Target Vertex: Map 4 + Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: srcpart_small10 @@ -3730,52 +3762,7 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: srcpart_date_n7 - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 720000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 0 Map 1 - Statistics: Num rows: 2200 Data size: 404800 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=10) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 5 + Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -3790,6 +3777,19 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=10) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -3847,13 +3847,45 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE) - Map 4 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date_n7 + filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 720000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 2200 Data size: 404800 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 Map Operator Tree: TableScan alias: srcpart_small_n3 @@ -3874,7 +3906,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 2 + 1 Map 4 Statistics: Num rows: 22 Data size: 5962 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) @@ -3897,10 +3929,10 @@ STAGE PLANS: Target Input: srcpart_date_n7 Partition key expr: ds Statistics: Num rows: 22 Data size: 5962 Basic stats: PARTIAL Column stats: NONE - Target Vertex: Map 4 + Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: srcpart_small10 @@ -3936,52 +3968,7 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: srcpart_date_n7 - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 720000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 0 Map 1 - Statistics: Num rows: 2200 Data size: 404800 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=10) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 5 + Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -3996,6 +3983,19 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=10) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator @@ -4032,12 +4032,44 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date_n7 + filterExpr: ds is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 720000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 2200 Data size: 404800 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 Map Operator Tree: TableScan alias: srcpart_small_n3 @@ -4058,7 +4090,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 2 + 1 Map 4 Statistics: Num rows: 22 Data size: 5962 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) @@ -4081,10 +4113,10 @@ STAGE PLANS: Target Input: srcpart_date_n7 Partition key expr: ds Statistics: Num rows: 22 Data size: 5962 Basic stats: PARTIAL Column stats: NONE - Target Vertex: Map 3 + Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: all inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: srcpart_small10 @@ -4105,39 +4137,7 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: srcpart_date_n7 - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 720000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 0 Map 1 - Statistics: Num rows: 2200 Data size: 404800 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 4 + Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out index d748aefdb4..9eea9ec1c3 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out @@ -148,7 +148,7 @@ POSTHOOK: Output: default@srcpart_small_n2 POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-08 POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09 #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[106][tables = [$hdt$_1, $hdt$_2, $hdt$_0, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[108][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: EXPLAIN SELECT count(*) FROM ( @@ -208,47 +208,47 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 8 (BROADCAST_EDGE) - Map 10 <- Reducer 8 (BROADCAST_EDGE) - Map 11 <- Reducer 9 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Map 7 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Reducer 9 (BROADCAST_EDGE) + Map 11 <- Reducer 10 (BROADCAST_EDGE) + Map 5 <- Reducer 9 (BROADCAST_EDGE) + Reducer 10 <- Map 8 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Reducer 7 <- Map 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: alltypesorc_int_n0 - filterExpr: (cstring is not null and cstring BETWEEN DynamicValue(RS_26_srcpart_small_n2_key1_min) AND DynamicValue(RS_26_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_26_srcpart_small_n2_key1_bloom_filter))) (type: boolean) - Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE + alias: srcpart_date_n6 + filterExpr: (key is not null and key BETWEEN DynamicValue(RS_10_srcpart_small_n2_key1_min) AND DynamicValue(RS_10_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_n2_key1_bloom_filter))) (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cstring is not null and cstring BETWEEN DynamicValue(RS_26_srcpart_small_n2_key1_min) AND DynamicValue(RS_26_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_26_srcpart_small_n2_key1_bloom_filter))) (type: boolean) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key is not null and key BETWEEN DynamicValue(RS_10_srcpart_small_n2_key1_min) AND DynamicValue(RS_10_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_n2_key1_bloom_filter))) (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cstring (type: string) + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Map 10 + Map 11 Map Operator Tree: TableScan alias: srcpart_date_n6 - filterExpr: (key is not null and key BETWEEN DynamicValue(RS_26_srcpart_small_n2_key1_min) AND DynamicValue(RS_26_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_26_srcpart_small_n2_key1_bloom_filter))) (type: boolean) + filterExpr: (key is not null and key BETWEEN DynamicValue(RS_23_srcpart_small_n2_key1_min) AND DynamicValue(RS_23_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_23_srcpart_small_n2_key1_bloom_filter))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and key BETWEEN DynamicValue(RS_26_srcpart_small_n2_key1_min) AND DynamicValue(RS_26_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_26_srcpart_small_n2_key1_bloom_filter))) (type: boolean) + predicate: (key is not null and key BETWEEN DynamicValue(RS_23_srcpart_small_n2_key1_min) AND DynamicValue(RS_23_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_23_srcpart_small_n2_key1_bloom_filter))) (type: boolean) Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) @@ -262,28 +262,28 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Map 11 + Map 5 Map Operator Tree: TableScan - alias: srcpart_date_n6 - filterExpr: (key is not null and key BETWEEN DynamicValue(RS_19_srcpart_small_n2_key1_min) AND DynamicValue(RS_19_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_19_srcpart_small_n2_key1_bloom_filter))) (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + alias: alltypesorc_int_n0 + filterExpr: (cstring is not null and cstring BETWEEN DynamicValue(RS_10_srcpart_small_n2_key1_min) AND DynamicValue(RS_10_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_n2_key1_bloom_filter))) (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and key BETWEEN DynamicValue(RS_19_srcpart_small_n2_key1_min) AND DynamicValue(RS_19_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_19_srcpart_small_n2_key1_bloom_filter))) (type: boolean) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (cstring is not null and cstring BETWEEN DynamicValue(RS_10_srcpart_small_n2_key1_min) AND DynamicValue(RS_10_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_n2_key1_bloom_filter))) (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) + expressions: cstring (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: srcpart_small_n2 @@ -334,44 +334,34 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Reducer 2 - Execution mode: llap + Reducer 10 + Execution mode: vectorized, llap Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE - Reducer 3 + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col1 (type: string) Statistics: Num rows: 11100 Data size: 779119 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator null sort order: sort order: Statistics: Num rows: 11100 Data size: 779119 Basic stats: PARTIAL Column stats: NONE - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -392,7 +382,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -414,14 +404,37 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) Statistics: Num rows: 11100 Data size: 779119 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator null sort order: sort order: Statistics: Num rows: 11100 Data size: 779119 Basic stats: PARTIAL Column stats: NONE - Reducer 8 + Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -439,19 +452,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out index c8fa6252f4..6f066209a8 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out @@ -340,55 +340,55 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 llap - File Output Operator [FS_20] - Group By Operator [GBY_18] (rows=1 width=8) + Reducer 3 llap + File Output Operator [FS_21] + Group By Operator [GBY_19] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_17] - Group By Operator [GBY_16] (rows=1 width=8) + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_30] (rows=10091 width=70) - Conds:RS_12._col1=RS_13._col0(Inner) - <-Map 6 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_31] (rows=10091 width=70) + Conds:RS_13._col0=RS_14._col1(Inner) + <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=9174 width=70) + Select Operator [SEL_2] (rows=9174 width=70) Output:["_col0"] - Filter Operator [FIL_28] (rows=9174 width=70) + Filter Operator [FIL_27] (rows=9174 width=70) predicate:cstring is not null - TableScan [TS_6] (rows=12288 width=70) + TableScan [TS_0] (rows=12288 width=70) default@alltypesorc_int_n2,alltypesorc_int_n2,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_29] (rows=2200 width=87) + Merge Join Operator [MERGEJOIN_30] (rows=2200 width=87) Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1"] - <-Map 1 [SIMPLE_EDGE] llap + <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_2] (rows=2000 width=87) + Select Operator [SEL_5] (rows=2000 width=87) Output:["_col0"] - Filter Operator [FIL_26] (rows=2000 width=87) + Filter Operator [FIL_28] (rows=2000 width=87) predicate:key is not null - TableScan [TS_0] (rows=2000 width=87) + TableScan [TS_3] (rows=2000 width=87) default@srcpart_date_n9,srcpart_date_n9,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map 5 [SIMPLE_EDGE] llap + <-Map 6 [SIMPLE_EDGE] llap SHUFFLE [RS_10] PartitionCols:_col0 - Select Operator [SEL_5] (rows=20 width=87) + Select Operator [SEL_8] (rows=20 width=87) Output:["_col0"] - Filter Operator [FIL_27] (rows=20 width=87) + Filter Operator [FIL_29] (rows=20 width=87) predicate:key1 is not null - TableScan [TS_3] (rows=20 width=87) + TableScan [TS_6] (rows=20 width=87) default@srcpart_small_n4,srcpart_small_n4,Tbl:PARTIAL,Col:PARTIAL,Output:["key1"] PREHOOK: query: select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_small_n4.key1 = alltypesorc_int_n2.cstring) @@ -435,73 +435,73 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 6 (BROADCAST_EDGE) -Map 7 <- Reducer 6 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 7 (BROADCAST_EDGE) +Map 4 <- Reducer 7 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 llap - File Output Operator [FS_20] - Group By Operator [GBY_18] (rows=1 width=8) + Reducer 3 llap + File Output Operator [FS_21] + Group By Operator [GBY_19] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_17] - Group By Operator [GBY_16] (rows=1 width=8) + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_53] (rows=10091 width=70) - Conds:RS_12._col1=RS_13._col0(Inner) - <-Map 7 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_54] (rows=10091 width=70) + Conds:RS_13._col0=RS_14._col1(Inner) + <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=9174 width=70) + Select Operator [SEL_2] (rows=9174 width=70) Output:["_col0"] - Filter Operator [FIL_28] (rows=9174 width=70) + Filter Operator [FIL_27] (rows=9174 width=70) predicate:(cstring is not null and cstring BETWEEN DynamicValue(RS_10_srcpart_small_n4_key1_min) AND DynamicValue(RS_10_srcpart_small_n4_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_n4_key1_bloom_filter))) - TableScan [TS_6] (rows=12288 width=70) + TableScan [TS_0] (rows=12288 width=70) default@alltypesorc_int_n2,alltypesorc_int_n2,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"] - <-Reducer 6 [BROADCAST_EDGE] llap - BROADCAST [RS_50] - Group By Operator [GBY_32] (rows=1 width=639) + <-Reducer 7 [BROADCAST_EDGE] llap + BROADCAST [RS_39] + Group By Operator [GBY_38] (rows=1 width=639) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20)"] - <-Map 5 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_31] - Group By Operator [GBY_30] (rows=1 width=639) + <-Map 6 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_37] + Group By Operator [GBY_36] (rows=1 width=639) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20)"] - Select Operator [SEL_29] (rows=20 width=87) + Select Operator [SEL_35] (rows=20 width=87) Output:["_col0"] - Select Operator [SEL_5] (rows=20 width=87) + Select Operator [SEL_8] (rows=20 width=87) Output:["_col0"] - Filter Operator [FIL_27] (rows=20 width=87) + Filter Operator [FIL_29] (rows=20 width=87) predicate:key1 is not null - TableScan [TS_3] (rows=20 width=87) + TableScan [TS_6] (rows=20 width=87) default@srcpart_small_n4,srcpart_small_n4,Tbl:PARTIAL,Col:PARTIAL,Output:["key1"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_52] (rows=2200 width=87) + Merge Join Operator [MERGEJOIN_53] (rows=2200 width=87) Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1"] - <-Map 5 [SIMPLE_EDGE] llap + <-Map 6 [SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_10] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_5] - <-Map 1 [SIMPLE_EDGE] llap + Please refer to the previous Select Operator [SEL_8] + <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_2] (rows=2000 width=87) + Select Operator [SEL_5] (rows=2000 width=87) Output:["_col0"] - Filter Operator [FIL_26] (rows=2000 width=87) + Filter Operator [FIL_28] (rows=2000 width=87) predicate:(key is not null and key BETWEEN DynamicValue(RS_10_srcpart_small_n4_key1_min) AND DynamicValue(RS_10_srcpart_small_n4_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_n4_key1_bloom_filter))) - TableScan [TS_0] (rows=2000 width=87) + TableScan [TS_3] (rows=2000 width=87) default@srcpart_date_n9,srcpart_date_n9,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 6 [BROADCAST_EDGE] llap - BROADCAST [RS_33] - Please refer to the previous Group By Operator [GBY_32] + <-Reducer 7 [BROADCAST_EDGE] llap + BROADCAST [RS_45] + Please refer to the previous Group By Operator [GBY_38] PREHOOK: query: select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_small_n4.key1 = alltypesorc_int_n2.cstring) PREHOOK: type: QUERY @@ -712,55 +712,55 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 llap - File Output Operator [FS_20] - Group By Operator [GBY_18] (rows=1 width=8) + Reducer 3 llap + File Output Operator [FS_21] + Group By Operator [GBY_19] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_17] - Group By Operator [GBY_16] (rows=1 width=8) + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_30] (rows=10091 width=70) - Conds:RS_12._col1=RS_13._col0(Inner) - <-Map 6 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_31] (rows=10091 width=70) + Conds:RS_13._col0=RS_14._col1(Inner) + <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=9174 width=70) + Select Operator [SEL_2] (rows=9174 width=70) Output:["_col0"] - Filter Operator [FIL_28] (rows=9174 width=70) + Filter Operator [FIL_27] (rows=9174 width=70) predicate:cstring is not null - TableScan [TS_6] (rows=12288 width=70) + TableScan [TS_0] (rows=12288 width=70) default@alltypesorc_int_n2,alltypesorc_int_n2,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_29] (rows=2200 width=178) + Merge Join Operator [MERGEJOIN_30] (rows=2200 width=178) Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1"] - <-Map 1 [SIMPLE_EDGE] llap + <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_2] (rows=2000 width=178) + Select Operator [SEL_5] (rows=2000 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_26] (rows=2000 width=178) + Filter Operator [FIL_28] (rows=2000 width=178) predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=2000 width=178) + TableScan [TS_3] (rows=2000 width=178) default@srcpart_date_n9,srcpart_date_n9,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 5 [SIMPLE_EDGE] llap + <-Map 6 [SIMPLE_EDGE] llap SHUFFLE [RS_10] PartitionCols:_col0 - Select Operator [SEL_5] (rows=20 width=87) + Select Operator [SEL_8] (rows=20 width=87) Output:["_col0"] - Filter Operator [FIL_27] (rows=20 width=87) + Filter Operator [FIL_29] (rows=20 width=87) predicate:key1 is not null - TableScan [TS_3] (rows=20 width=87) + TableScan [TS_6] (rows=20 width=87) default@srcpart_small_n4,srcpart_small_n4,Tbl:PARTIAL,Col:PARTIAL,Output:["key1"] PREHOOK: query: select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_date_n9.value = alltypesorc_int_n2.cstring) @@ -807,82 +807,82 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 7 (BROADCAST_EDGE) -Map 8 <- Reducer 5 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 6 (BROADCAST_EDGE) +Map 4 <- Reducer 8 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 llap - File Output Operator [FS_20] - Group By Operator [GBY_18] (rows=1 width=8) + Reducer 3 llap + File Output Operator [FS_21] + Group By Operator [GBY_19] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_17] - Group By Operator [GBY_16] (rows=1 width=8) + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_50] (rows=10091 width=70) - Conds:RS_12._col1=RS_13._col0(Inner) - <-Reducer 2 [SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_12] + Merge Join Operator [MERGEJOIN_51] (rows=10091 width=70) + Conds:RS_13._col0=RS_14._col1(Inner) + <-Reducer 5 [SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_49] (rows=2200 width=178) + Merge Join Operator [MERGEJOIN_50] (rows=2200 width=178) Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col1"] - <-Map 6 [SIMPLE_EDGE] llap + <-Map 7 [SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_10] PartitionCols:_col0 - Select Operator [SEL_5] (rows=20 width=87) + Select Operator [SEL_8] (rows=20 width=87) Output:["_col0"] - Filter Operator [FIL_27] (rows=20 width=87) + Filter Operator [FIL_29] (rows=20 width=87) predicate:key1 is not null - TableScan [TS_3] (rows=20 width=87) + TableScan [TS_6] (rows=20 width=87) default@srcpart_small_n4,srcpart_small_n4,Tbl:PARTIAL,Col:PARTIAL,Output:["key1"] - <-Map 1 [SIMPLE_EDGE] llap + <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_2] (rows=2000 width=178) + Select Operator [SEL_5] (rows=2000 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_26] (rows=2000 width=178) + Filter Operator [FIL_28] (rows=2000 width=178) predicate:(key is not null and value is not null and key BETWEEN DynamicValue(RS_10_srcpart_small_n4_key1_min) AND DynamicValue(RS_10_srcpart_small_n4_key1_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_n4_key1_bloom_filter))) - TableScan [TS_0] (rows=2000 width=178) + TableScan [TS_3] (rows=2000 width=178) default@srcpart_date_n9,srcpart_date_n9,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 7 [BROADCAST_EDGE] llap - BROADCAST [RS_33] - Group By Operator [GBY_32] (rows=1 width=639) + <-Reducer 8 [BROADCAST_EDGE] llap + BROADCAST [RS_39] + Group By Operator [GBY_38] (rows=1 width=639) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_31] - Group By Operator [GBY_30] (rows=1 width=639) + <-Map 7 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_37] + Group By Operator [GBY_36] (rows=1 width=639) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20)"] - Select Operator [SEL_29] (rows=20 width=87) + Select Operator [SEL_35] (rows=20 width=87) Output:["_col0"] - Please refer to the previous Select Operator [SEL_5] - <-Map 8 [SIMPLE_EDGE] llap + Please refer to the previous Select Operator [SEL_8] + <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=9174 width=70) + Select Operator [SEL_2] (rows=9174 width=70) Output:["_col0"] - Filter Operator [FIL_28] (rows=9174 width=70) - predicate:(cstring is not null and cstring BETWEEN DynamicValue(RS_12_srcpart_date_n9_value_min) AND DynamicValue(RS_12_srcpart_date_n9_value_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_n9_value_bloom_filter))) - TableScan [TS_6] (rows=12288 width=70) + Filter Operator [FIL_27] (rows=9174 width=70) + predicate:(cstring is not null and cstring BETWEEN DynamicValue(RS_14_srcpart_date_n9_value_min) AND DynamicValue(RS_14_srcpart_date_n9_value_max) and in_bloom_filter(cstring, DynamicValue(RS_14_srcpart_date_n9_value_bloom_filter))) + TableScan [TS_0] (rows=12288 width=70) default@alltypesorc_int_n2,alltypesorc_int_n2,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"] - <-Reducer 5 [BROADCAST_EDGE] llap - BROADCAST [RS_48] - Group By Operator [GBY_47] (rows=1 width=552) + <-Reducer 6 [BROADCAST_EDGE] llap + BROADCAST [RS_34] + Group By Operator [GBY_33] (rows=1 width=552) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=2200)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_46] - Group By Operator [GBY_45] (rows=1 width=552) + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_32] + Group By Operator [GBY_31] (rows=1 width=552) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=2200)"] - Select Operator [SEL_44] (rows=2200 width=178) + Select Operator [SEL_30] (rows=2200 width=178) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_49] + Please refer to the previous Merge Join Operator [MERGEJOIN_50] PREHOOK: query: select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_date_n9.value = alltypesorc_int_n2.cstring) PREHOOK: type: QUERY @@ -1459,49 +1459,49 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 2 (BROADCAST_EDGE) -Map 3 <- Map 1 (BROADCAST_EDGE) -Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Map 3 (BROADCAST_EDGE) +Map 3 <- Map 4 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 llap - File Output Operator [FS_20] - Group By Operator [GBY_18] (rows=1 width=8) + Reducer 2 llap + File Output Operator [FS_21] + Group By Operator [GBY_19] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 3 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_17] - Group By Operator [GBY_16] (rows=1 width=8) + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_30] (rows=10091 width=70) - Conds:RS_12._col1=SEL_8._col0(Inner) - <-Map 1 [BROADCAST_EDGE] llap - BROADCAST [RS_12] + Map Join Operator [MAPJOIN_31] (rows=10091 width=70) + Conds:SEL_2._col0=RS_14._col1(Inner) + <-Map 3 [BROADCAST_EDGE] llap + BROADCAST [RS_14] PartitionCols:_col1 - Map Join Operator [MAPJOIN_29] (rows=2200 width=178) - Conds:SEL_2._col0=RS_10._col0(Inner),Output:["_col1"] - <-Map 2 [BROADCAST_EDGE] llap + Map Join Operator [MAPJOIN_30] (rows=2200 width=178) + Conds:SEL_5._col0=RS_10._col0(Inner),Output:["_col1"] + <-Map 4 [BROADCAST_EDGE] llap BROADCAST [RS_10] PartitionCols:_col0 - Select Operator [SEL_5] (rows=20 width=87) + Select Operator [SEL_8] (rows=20 width=87) Output:["_col0"] - Filter Operator [FIL_27] (rows=20 width=87) + Filter Operator [FIL_29] (rows=20 width=87) predicate:key1 is not null - TableScan [TS_3] (rows=20 width=87) + TableScan [TS_6] (rows=20 width=87) default@srcpart_small_n4,srcpart_small_n4,Tbl:PARTIAL,Col:PARTIAL,Output:["key1"] - <-Select Operator [SEL_2] (rows=2000 width=178) + <-Select Operator [SEL_5] (rows=2000 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_26] (rows=2000 width=178) + Filter Operator [FIL_28] (rows=2000 width=178) predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=2000 width=178) + TableScan [TS_3] (rows=2000 width=178) default@srcpart_date_n9,srcpart_date_n9,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_8] (rows=9174 width=70) + <-Select Operator [SEL_2] (rows=9174 width=70) Output:["_col0"] - Filter Operator [FIL_28] (rows=9174 width=70) + Filter Operator [FIL_27] (rows=9174 width=70) predicate:cstring is not null - TableScan [TS_6] (rows=12288 width=70) + TableScan [TS_0] (rows=12288 width=70) default@alltypesorc_int_n2,alltypesorc_int_n2,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"] PREHOOK: query: select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_date_n9.value = alltypesorc_int_n2.cstring) @@ -1548,49 +1548,49 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 2 (BROADCAST_EDGE) -Map 3 <- Map 1 (BROADCAST_EDGE) -Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Map 3 (BROADCAST_EDGE) +Map 3 <- Map 4 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 llap - File Output Operator [FS_20] - Group By Operator [GBY_18] (rows=1 width=8) + Reducer 2 llap + File Output Operator [FS_21] + Group By Operator [GBY_19] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 3 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_17] - Group By Operator [GBY_16] (rows=1 width=8) + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Map Join Operator [MAPJOIN_50] (rows=10091 width=70) - Conds:RS_12._col1=SEL_8._col0(Inner) - <-Map 1 [BROADCAST_EDGE] llap - BROADCAST [RS_12] + Map Join Operator [MAPJOIN_51] (rows=10091 width=70) + Conds:SEL_2._col0=RS_14._col1(Inner) + <-Map 3 [BROADCAST_EDGE] llap + BROADCAST [RS_14] PartitionCols:_col1 - Map Join Operator [MAPJOIN_49] (rows=2200 width=178) - Conds:SEL_2._col0=RS_10._col0(Inner),Output:["_col1"] - <-Map 2 [BROADCAST_EDGE] llap + Map Join Operator [MAPJOIN_50] (rows=2200 width=178) + Conds:SEL_5._col0=RS_10._col0(Inner),Output:["_col1"] + <-Map 4 [BROADCAST_EDGE] llap BROADCAST [RS_10] PartitionCols:_col0 - Select Operator [SEL_5] (rows=20 width=87) + Select Operator [SEL_8] (rows=20 width=87) Output:["_col0"] - Filter Operator [FIL_27] (rows=20 width=87) + Filter Operator [FIL_29] (rows=20 width=87) predicate:key1 is not null - TableScan [TS_3] (rows=20 width=87) + TableScan [TS_6] (rows=20 width=87) default@srcpart_small_n4,srcpart_small_n4,Tbl:PARTIAL,Col:PARTIAL,Output:["key1"] - <-Select Operator [SEL_2] (rows=2000 width=178) + <-Select Operator [SEL_5] (rows=2000 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_26] (rows=2000 width=178) + Filter Operator [FIL_28] (rows=2000 width=178) predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=2000 width=178) + TableScan [TS_3] (rows=2000 width=178) default@srcpart_date_n9,srcpart_date_n9,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_8] (rows=9174 width=70) + <-Select Operator [SEL_2] (rows=9174 width=70) Output:["_col0"] - Filter Operator [FIL_28] (rows=9174 width=70) + Filter Operator [FIL_27] (rows=9174 width=70) predicate:cstring is not null - TableScan [TS_6] (rows=12288 width=70) + TableScan [TS_0] (rows=12288 width=70) default@alltypesorc_int_n2,alltypesorc_int_n2,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"] PREHOOK: query: select count(*) from srcpart_date_n9 join srcpart_small_n4 on (srcpart_date_n9.key = srcpart_small_n4.key1) join alltypesorc_int_n2 on (srcpart_date_n9.value = alltypesorc_int_n2.cstring) diff --git a/ql/src/test/results/clientpositive/llap/estimate_pkfk_filtered_fk.q.out b/ql/src/test/results/clientpositive/llap/estimate_pkfk_filtered_fk.q.out index a9facbfac1..be5974c27f 100644 --- a/ql/src/test/results/clientpositive/llap/estimate_pkfk_filtered_fk.q.out +++ b/ql/src/test/results/clientpositive/llap/estimate_pkfk_filtered_fk.q.out @@ -180,49 +180,51 @@ POSTHOOK: Input: default@torpedos Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap - File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_46] (rows=101/10 width=4) - Conds:RS_12._col0=RS_55._col0(Inner),Output:["_col0"] - <-Map 5 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_55] - PartitionCols:_col0 - Select Operator [SEL_54] (rows=1000/1000 width=4) - Output:["_col0"] - Filter Operator [FIL_53] (rows=1000/1000 width=4) - predicate:ship_id is not null - TableScan [TS_6] (rows=1000/1000 width=4) - default@torpedos,t,Tbl:COMPLETE,Col:COMPLETE,Output:["ship_id"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_45] (rows=2/1 width=4) - Conds:RS_49._col1=RS_52._col0(Inner),Output:["_col0"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_49] - PartitionCols:_col1 - Select Operator [SEL_48] (rows=10/10 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_47] (rows=10/10 width=12) - predicate:((crew_size = 2) and ship_type_id is not null and id is not null) - TableScan [TS_0] (rows=100/100 width=12) - default@ships,s,Tbl:COMPLETE,Col:COMPLETE,Output:["id","ship_type_id","crew_size"] - <-Map 4 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_52] - PartitionCols:_col0 - Select Operator [SEL_51] (rows=1/1 width=4) - Output:["_col0"] - Filter Operator [FIL_50] (rows=1/1 width=99) - predicate:((type_name = 'galaxy class') and id is not null) - TableScan [TS_3] (rows=10/10 width=99) - default@ship_types,st,Tbl:COMPLETE,Col:COMPLETE,Output:["id","type_name"] + Reducer 2 llap + File Output Operator [FS_17] + Select Operator [SEL_16] (rows=101/10 width=4) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_47] (rows=101/10 width=4) + Conds:RS_50._col0=RS_14._col0(Inner),Output:["_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_50] + PartitionCols:_col0 + Select Operator [SEL_49] (rows=1000/1000 width=4) + Output:["_col0"] + Filter Operator [FIL_48] (rows=1000/1000 width=4) + predicate:ship_id is not null + TableScan [TS_0] (rows=1000/1000 width=4) + default@torpedos,t,Tbl:COMPLETE,Col:COMPLETE,Output:["ship_id"] + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_46] (rows=2/1 width=4) + Conds:RS_53._col1=RS_56._col0(Inner),Output:["_col0"] + <-Map 3 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_53] + PartitionCols:_col1 + Select Operator [SEL_52] (rows=10/10 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_51] (rows=10/10 width=12) + predicate:((crew_size = 2) and ship_type_id is not null and id is not null) + TableScan [TS_3] (rows=100/100 width=12) + default@ships,s,Tbl:COMPLETE,Col:COMPLETE,Output:["id","ship_type_id","crew_size"] + <-Map 5 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_56] + PartitionCols:_col0 + Select Operator [SEL_55] (rows=1/1 width=4) + Output:["_col0"] + Filter Operator [FIL_54] (rows=1/1 width=99) + predicate:((type_name = 'galaxy class') and id is not null) + TableScan [TS_6] (rows=10/10 width=99) + default@ship_types,st,Tbl:COMPLETE,Col:COMPLETE,Output:["id","type_name"] PREHOOK: query: select s.id diff --git a/ql/src/test/results/clientpositive/llap/estimate_pkfk_nocond.q.out b/ql/src/test/results/clientpositive/llap/estimate_pkfk_nocond.q.out index bdd8c08523..411ebfd205 100644 --- a/ql/src/test/results/clientpositive/llap/estimate_pkfk_nocond.q.out +++ b/ql/src/test/results/clientpositive/llap/estimate_pkfk_nocond.q.out @@ -176,47 +176,49 @@ POSTHOOK: Input: default@torpedos Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap - File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_46] (rows=101/100 width=4) - Conds:RS_12._col0=RS_55._col0(Inner),Output:["_col0"] - <-Map 5 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_55] - PartitionCols:_col0 - Select Operator [SEL_54] (rows=1000/1000 width=4) - Output:["_col0"] - Filter Operator [FIL_53] (rows=1000/1000 width=4) - predicate:ship_id is not null - TableScan [TS_6] (rows=1000/1000 width=4) - default@torpedos,t,Tbl:COMPLETE,Col:COMPLETE,Output:["ship_id"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_45] (rows=11/10 width=4) - Conds:RS_49._col1=RS_52._col0(Inner),Output:["_col0"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_49] - PartitionCols:_col1 - Select Operator [SEL_48] (rows=100/100 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_47] (rows=100/100 width=8) - predicate:(ship_type_id is not null and id is not null) - TableScan [TS_0] (rows=100/100 width=8) - default@ships,s,Tbl:COMPLETE,Col:COMPLETE,Output:["id","ship_type_id"] - <-Map 4 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_52] - PartitionCols:_col0 - Select Operator [SEL_51] (rows=1/1 width=4) - Output:["_col0"] - Filter Operator [FIL_50] (rows=1/1 width=99) - predicate:((type_name = 'galaxy class') and id is not null) - TableScan [TS_3] (rows=10/10 width=99) - default@ship_types,st,Tbl:COMPLETE,Col:COMPLETE,Output:["id","type_name"] + Reducer 2 llap + File Output Operator [FS_17] + Select Operator [SEL_16] (rows=101/100 width=4) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_47] (rows=101/100 width=4) + Conds:RS_50._col0=RS_14._col0(Inner),Output:["_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_50] + PartitionCols:_col0 + Select Operator [SEL_49] (rows=1000/1000 width=4) + Output:["_col0"] + Filter Operator [FIL_48] (rows=1000/1000 width=4) + predicate:ship_id is not null + TableScan [TS_0] (rows=1000/1000 width=4) + default@torpedos,t,Tbl:COMPLETE,Col:COMPLETE,Output:["ship_id"] + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_46] (rows=11/10 width=4) + Conds:RS_53._col1=RS_56._col0(Inner),Output:["_col0"] + <-Map 3 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_53] + PartitionCols:_col1 + Select Operator [SEL_52] (rows=100/100 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_51] (rows=100/100 width=8) + predicate:(ship_type_id is not null and id is not null) + TableScan [TS_3] (rows=100/100 width=8) + default@ships,s,Tbl:COMPLETE,Col:COMPLETE,Output:["id","ship_type_id"] + <-Map 5 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_56] + PartitionCols:_col0 + Select Operator [SEL_55] (rows=1/1 width=4) + Output:["_col0"] + Filter Operator [FIL_54] (rows=1/1 width=99) + predicate:((type_name = 'galaxy class') and id is not null) + TableScan [TS_6] (rows=10/10 width=99) + default@ship_types,st,Tbl:COMPLETE,Col:COMPLETE,Output:["id","type_name"] diff --git a/ql/src/test/results/clientpositive/llap/estimate_pkfk_push.q.out b/ql/src/test/results/clientpositive/llap/estimate_pkfk_push.q.out index 2ba22d68bc..ef788024ea 100644 --- a/ql/src/test/results/clientpositive/llap/estimate_pkfk_push.q.out +++ b/ql/src/test/results/clientpositive/llap/estimate_pkfk_push.q.out @@ -176,47 +176,49 @@ POSTHOOK: Input: default@torpedos Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap - File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_46] (rows=201/200 width=4) - Conds:RS_12._col0=RS_55._col0(Inner),Output:["_col0"] - <-Map 5 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_55] - PartitionCols:_col0 - Select Operator [SEL_54] (rows=1000/1000 width=4) - Output:["_col0"] - Filter Operator [FIL_53] (rows=1000/1000 width=4) - predicate:ship_id is not null - TableScan [TS_6] (rows=1000/1000 width=4) - default@torpedos,t,Tbl:COMPLETE,Col:COMPLETE,Output:["ship_id"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_45] (rows=20/20 width=4) - Conds:RS_49._col1=RS_52._col0(Inner),Output:["_col0"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_49] - PartitionCols:_col1 - Select Operator [SEL_48] (rows=20/20 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_47] (rows=20/20 width=8) - predicate:((ship_type_id) IN (1, 2) and id is not null) - TableScan [TS_0] (rows=100/100 width=8) - default@ships,s,Tbl:COMPLETE,Col:COMPLETE,Output:["id","ship_type_id"] - <-Map 4 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_52] - PartitionCols:_col0 - Select Operator [SEL_51] (rows=2/2 width=4) - Output:["_col0"] - Filter Operator [FIL_50] (rows=2/2 width=4) - predicate:(id) IN (1, 2) - TableScan [TS_3] (rows=10/10 width=4) - default@ship_types,st,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] + Reducer 2 llap + File Output Operator [FS_17] + Select Operator [SEL_16] (rows=201/200 width=4) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_47] (rows=201/200 width=4) + Conds:RS_50._col0=RS_14._col0(Inner),Output:["_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_50] + PartitionCols:_col0 + Select Operator [SEL_49] (rows=1000/1000 width=4) + Output:["_col0"] + Filter Operator [FIL_48] (rows=1000/1000 width=4) + predicate:ship_id is not null + TableScan [TS_0] (rows=1000/1000 width=4) + default@torpedos,t,Tbl:COMPLETE,Col:COMPLETE,Output:["ship_id"] + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_46] (rows=20/20 width=4) + Conds:RS_53._col1=RS_56._col0(Inner),Output:["_col0"] + <-Map 3 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_53] + PartitionCols:_col1 + Select Operator [SEL_52] (rows=20/20 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_51] (rows=20/20 width=8) + predicate:((ship_type_id) IN (1, 2) and id is not null) + TableScan [TS_3] (rows=100/100 width=8) + default@ships,s,Tbl:COMPLETE,Col:COMPLETE,Output:["id","ship_type_id"] + <-Map 5 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_56] + PartitionCols:_col0 + Select Operator [SEL_55] (rows=2/2 width=4) + Output:["_col0"] + Filter Operator [FIL_54] (rows=2/2 width=4) + predicate:(id) IN (1, 2) + TableScan [TS_6] (rows=10/10 width=4) + default@ship_types,st,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] diff --git a/ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out b/ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out index 36bd120028..29bf6c3a41 100644 --- a/ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out @@ -47,129 +47,129 @@ POSTHOOK: Input: default@src1 Plan optimized by CBO. Vertex dependency in root stage -Map 12 <- Union 10 (CONTAINS) +Map 1 <- Union 2 (CONTAINS) Map 13 <- Union 14 (CONTAINS) Map 16 <- Union 14 (CONTAINS) -Map 9 <- Union 10 (CONTAINS) -Reducer 11 <- Union 10 (SIMPLE_EDGE) +Map 7 <- Union 2 (CONTAINS) +Reducer 10 <- Map 12 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 15 <- Union 14 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 5 <- Union 4 (SIMPLE_EDGE) -Reducer 6 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 7 <- Reducer 15 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 9 <- Map 12 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 5 llap - File Output Operator [FS_56] - Group By Operator [GBY_54] (rows=132/15 width=268) + Reducer 6 llap + File Output Operator [FS_58] + Group By Operator [GBY_56] (rows=132/15 width=268) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 4 [SIMPLE_EDGE] - <-Reducer 3 [CONTAINS] llap - Reduce Output Operator [RS_126] + <-Union 5 [SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] llap + Reduce Output Operator [RS_142] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_124] (rows=66/61 width=268) + Select Operator [SEL_140] (rows=66/61 width=268) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_123] (rows=66/61 width=268) - Conds:RS_21._col3=RS_22._col0(Inner),Output:["_col1","_col2","_col4"] - <-Reducer 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] + Merge Join Operator [MERGEJOIN_139] (rows=66/61 width=268) + Conds:RS_48._col0=RS_49._col3(Inner),Output:["_col0","_col2","_col3"] + <-Reducer 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_49] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_122] (rows=39/37 width=266) + Conds:RS_44._col0=RS_45._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] llap + SHUFFLE [RS_45] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_74] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_15] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_44] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_73] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_12] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 15 [SIMPLE_EDGE] llap + SHUFFLE [RS_48] PartitionCols:_col0 - Select Operator [SEL_17] (rows=525/319 width=91) + Select Operator [SEL_37] (rows=525/319 width=91) Output:["_col0"] - Group By Operator [GBY_16] (rows=525/319 width=178) + Group By Operator [GBY_36] (rows=525/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 10 [SIMPLE_EDGE] - <-Map 12 [CONTAINS] llap - Reduce Output Operator [RS_140] + <-Union 14 [SIMPLE_EDGE] + <-Map 13 [CONTAINS] llap + Reduce Output Operator [RS_147] PartitionCols:_col1, _col0 - Select Operator [SEL_138] (rows=500/500 width=178) + Select Operator [SEL_145] (rows=25/25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_137] (rows=500/500 width=178) + Filter Operator [FIL_144] (rows=25/25 width=175) predicate:value is not null - TableScan [TS_136] (rows=500/500 width=178) + TableScan [TS_143] (rows=25/25 width=175) Output:["key","value"] - <-Map 9 [CONTAINS] llap - Reduce Output Operator [RS_135] + <-Map 16 [CONTAINS] llap + Reduce Output Operator [RS_152] PartitionCols:_col1, _col0 - Select Operator [SEL_133] (rows=25/25 width=175) + Select Operator [SEL_150] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_132] (rows=25/25 width=175) + Filter Operator [FIL_149] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_131] (rows=25/25 width=175) + TableScan [TS_148] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_21] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_119] (rows=39/37 width=266) - Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_69] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_0] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_70] (rows=25/25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 7 [CONTAINS] llap - Reduce Output Operator [RS_130] + <-Reducer 4 [CONTAINS] llap + Reduce Output Operator [RS_133] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_128] (rows=66/61 width=268) + Select Operator [SEL_131] (rows=66/61 width=268) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_127] (rows=66/61 width=268) - Conds:RS_46._col3=RS_47._col0(Inner),Output:["_col1","_col2","_col4"] - <-Reducer 15 [SIMPLE_EDGE] llap - SHUFFLE [RS_47] + Merge Join Operator [MERGEJOIN_130] (rows=66/61 width=268) + Conds:RS_22._col0=RS_23._col3(Inner),Output:["_col0","_col2","_col3"] + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_22] PartitionCols:_col0 - Select Operator [SEL_42] (rows=525/319 width=91) + Select Operator [SEL_11] (rows=525/319 width=91) Output:["_col0"] - Group By Operator [GBY_41] (rows=525/319 width=178) + Group By Operator [GBY_10] (rows=525/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 14 [SIMPLE_EDGE] - <-Map 13 [CONTAINS] llap - Reduce Output Operator [RS_145] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] llap + Reduce Output Operator [RS_129] PartitionCols:_col1, _col0 - Select Operator [SEL_143] (rows=25/25 width=175) + Select Operator [SEL_127] (rows=25/25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_142] (rows=25/25 width=175) + Filter Operator [FIL_126] (rows=25/25 width=175) predicate:value is not null - TableScan [TS_141] (rows=25/25 width=175) + TableScan [TS_125] (rows=25/25 width=175) Output:["key","value"] - <-Map 16 [CONTAINS] llap - Reduce Output Operator [RS_150] + <-Map 7 [CONTAINS] llap + Reduce Output Operator [RS_138] PartitionCols:_col1, _col0 - Select Operator [SEL_148] (rows=500/500 width=178) + Select Operator [SEL_136] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_147] (rows=500/500 width=178) + Filter Operator [FIL_135] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_146] (rows=500/500 width=178) + TableScan [TS_134] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_46] + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_23] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_120] (rows=39/37 width=266) - Conds:RS_43._col0=RS_44._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_43] + Merge Join Operator [MERGEJOIN_121] (rows=39/37 width=266) + Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] llap + SHUFFLE [RS_19] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_2] + Please refer to the previous Select Operator [SEL_17] <-Map 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_44] + SHUFFLE [RS_18] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_5] + Please refer to the previous Select Operator [SEL_14] PREHOOK: query: SELECT x.key, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -236,8 +236,7 @@ POSTHOOK: Input: default@src1 Plan optimized by CBO. Vertex dependency in root stage -Map 12 <- Union 13 (CONTAINS) -Map 15 <- Union 13 (CONTAINS) +Map 1 <- Union 2 (CONTAINS) Map 16 <- Union 17 (CONTAINS) Map 21 <- Union 17 (CONTAINS) Map 22 <- Union 19 (CONTAINS) @@ -245,225 +244,226 @@ Map 23 <- Union 24 (CONTAINS) Map 30 <- Union 24 (CONTAINS) Map 31 <- Union 26 (CONTAINS) Map 32 <- Union 28 (CONTAINS) -Reducer 10 <- Reducer 20 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 14 <- Union 13 (SIMPLE_EDGE) +Map 9 <- Union 2 (CONTAINS) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 13 <- Map 10 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 18 <- Union 17 (SIMPLE_EDGE), Union 19 (CONTAINS) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) Reducer 20 <- Union 19 (SIMPLE_EDGE) Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS) Reducer 27 <- Union 26 (SIMPLE_EDGE), Union 28 (CONTAINS) Reducer 29 <- Union 28 (SIMPLE_EDGE) -Reducer 3 <- Reducer 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 5 <- Union 4 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 7 <- Union 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 9 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 3 <- Union 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 8 <- Union 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 llap - File Output Operator [FS_114] - Group By Operator [GBY_112] (rows=384/15 width=177) + Reducer 8 llap + File Output Operator [FS_117] + Group By Operator [GBY_115] (rows=384/15 width=177) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 6 [SIMPLE_EDGE] - <-Reducer 5 [CONTAINS] llap - Reduce Output Operator [RS_229] + <-Union 7 [SIMPLE_EDGE] + <-Reducer 12 [CONTAINS] llap + Reduce Output Operator [RS_246] PartitionCols:_col0, _col1 - Group By Operator [GBY_227] (rows=196/15 width=177) + Select Operator [SEL_244] (rows=193/61 width=177) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_243] (rows=193/61 width=177) + Conds:RS_107._col0=RS_108._col3(Inner),Output:["_col2","_col3"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_108] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_220] (rows=39/37 width=266) + Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_141] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_12] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 15 [SIMPLE_EDGE] llap + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_142] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_15] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 29 [SIMPLE_EDGE] llap + SHUFFLE [RS_107] + PartitionCols:_col0 + Select Operator [SEL_96] (rows=1525/319 width=91) + Output:["_col0"] + Group By Operator [GBY_95] (rows=1525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 28 [SIMPLE_EDGE] + <-Map 32 [CONTAINS] llap + Reduce Output Operator [RS_297] + PartitionCols:_col1, _col0 + Select Operator [SEL_295] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_294] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_293] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 27 [CONTAINS] llap + Reduce Output Operator [RS_282] + PartitionCols:_col1, _col0 + Select Operator [SEL_280] (rows=1025/319 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_279] (rows=1025/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 26 [SIMPLE_EDGE] + <-Map 31 [CONTAINS] llap + Reduce Output Operator [RS_292] + PartitionCols:_col1, _col0 + Select Operator [SEL_290] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_289] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_288] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 25 [CONTAINS] llap + Reduce Output Operator [RS_278] + PartitionCols:_col1, _col0 + Select Operator [SEL_276] (rows=525/319 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_275] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 24 [SIMPLE_EDGE] + <-Map 23 [CONTAINS] llap + Reduce Output Operator [RS_274] + PartitionCols:_col1, _col0 + Select Operator [SEL_272] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_271] (rows=25/25 width=175) + predicate:value is not null + TableScan [TS_270] (rows=25/25 width=175) + Output:["key","value"] + <-Map 30 [CONTAINS] llap + Reduce Output Operator [RS_287] + PartitionCols:_col1, _col0 + Select Operator [SEL_285] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_284] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_283] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 6 [CONTAINS] llap + Reduce Output Operator [RS_237] + PartitionCols:_col0, _col1 + Group By Operator [GBY_235] (rows=196/15 width=177) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 4 [SIMPLE_EDGE] - <-Reducer 10 [CONTAINS] llap - Reduce Output Operator [RS_237] + <-Union 5 [SIMPLE_EDGE] + <-Reducer 14 [CONTAINS] llap + Reduce Output Operator [RS_250] PartitionCols:_col0, _col1 - Select Operator [SEL_235] (rows=130/61 width=177) + Select Operator [SEL_248] (rows=130/61 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_234] (rows=130/61 width=177) - Conds:RS_55._col3=RS_56._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_247] (rows=130/61 width=177) + Conds:RS_57._col0=RS_58._col3(Inner),Output:["_col2","_col3"] + <-Reducer 13 [SIMPLE_EDGE] llap + SHUFFLE [RS_58] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_221] (rows=39/37 width=266) + Conds:RS_53._col0=RS_54._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_53] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_14] + <-Map 15 [SIMPLE_EDGE] llap + SHUFFLE [RS_54] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_17] <-Reducer 20 [SIMPLE_EDGE] llap - SHUFFLE [RS_56] + SHUFFLE [RS_57] PartitionCols:_col0 - Select Operator [SEL_51] (rows=1025/319 width=91) + Select Operator [SEL_46] (rows=1025/319 width=91) Output:["_col0"] - Group By Operator [GBY_50] (rows=1025/319 width=178) + Group By Operator [GBY_45] (rows=1025/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 19 [SIMPLE_EDGE] <-Map 22 [CONTAINS] llap - Reduce Output Operator [RS_266] + Reduce Output Operator [RS_269] PartitionCols:_col1, _col0 - Select Operator [SEL_264] (rows=500/500 width=178) + Select Operator [SEL_267] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_263] (rows=500/500 width=178) + Filter Operator [FIL_266] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_262] (rows=500/500 width=178) + TableScan [TS_265] (rows=500/500 width=178) Output:["key","value"] <-Reducer 18 [CONTAINS] llap - Reduce Output Operator [RS_256] + Reduce Output Operator [RS_259] PartitionCols:_col1, _col0 - Select Operator [SEL_254] (rows=525/319 width=178) + Select Operator [SEL_257] (rows=525/319 width=178) Output:["_col0","_col1"] - Group By Operator [GBY_253] (rows=525/319 width=178) + Group By Operator [GBY_256] (rows=525/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 17 [SIMPLE_EDGE] <-Map 16 [CONTAINS] llap - Reduce Output Operator [RS_252] + Reduce Output Operator [RS_255] PartitionCols:_col1, _col0 - Select Operator [SEL_250] (rows=25/25 width=175) + Select Operator [SEL_253] (rows=25/25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_249] (rows=25/25 width=175) + Filter Operator [FIL_252] (rows=25/25 width=175) predicate:value is not null - TableScan [TS_248] (rows=25/25 width=175) + TableScan [TS_251] (rows=25/25 width=175) Output:["key","value"] <-Map 21 [CONTAINS] llap - Reduce Output Operator [RS_261] + Reduce Output Operator [RS_264] PartitionCols:_col1, _col0 - Select Operator [SEL_259] (rows=500/500 width=178) + Select Operator [SEL_262] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_258] (rows=500/500 width=178) + Filter Operator [FIL_261] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_257] (rows=500/500 width=178) + TableScan [TS_260] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_55] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_218] (rows=39/37 width=266) - Conds:RS_52._col0=RS_53._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_52] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_136] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_0] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_53] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_137] (rows=25/25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 3 [CONTAINS] llap - Reduce Output Operator [RS_226] + <-Reducer 4 [CONTAINS] llap + Reduce Output Operator [RS_234] PartitionCols:_col0, _col1 - Select Operator [SEL_224] (rows=66/61 width=177) + Select Operator [SEL_232] (rows=66/61 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_223] (rows=66/61 width=177) - Conds:RS_21._col3=RS_22._col0(Inner),Output:["_col1","_col2"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_21] + Merge Join Operator [MERGEJOIN_231] (rows=66/61 width=177) + Conds:RS_22._col0=RS_23._col3(Inner),Output:["_col2","_col3"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_23] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_217] (rows=39/37 width=266) - Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_2] - <-Map 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_5] - <-Reducer 14 [SIMPLE_EDGE] llap + Please refer to the previous Merge Join Operator [MERGEJOIN_220] + <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_22] PartitionCols:_col0 - Select Operator [SEL_17] (rows=525/319 width=91) + Select Operator [SEL_11] (rows=525/319 width=91) Output:["_col0"] - Group By Operator [GBY_16] (rows=525/319 width=178) + Group By Operator [GBY_10] (rows=525/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 13 [SIMPLE_EDGE] - <-Map 12 [CONTAINS] llap - Reduce Output Operator [RS_242] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] llap + Reduce Output Operator [RS_230] PartitionCols:_col1, _col0 - Select Operator [SEL_240] (rows=25/25 width=175) + Select Operator [SEL_228] (rows=25/25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_239] (rows=25/25 width=175) + Filter Operator [FIL_227] (rows=25/25 width=175) predicate:value is not null - TableScan [TS_238] (rows=25/25 width=175) + TableScan [TS_226] (rows=25/25 width=175) Output:["key","value"] - <-Map 15 [CONTAINS] llap - Reduce Output Operator [RS_247] + <-Map 9 [CONTAINS] llap + Reduce Output Operator [RS_242] PartitionCols:_col1, _col0 - Select Operator [SEL_245] (rows=500/500 width=178) + Select Operator [SEL_240] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_244] (rows=500/500 width=178) + Filter Operator [FIL_239] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_243] (rows=500/500 width=178) + TableScan [TS_238] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 8 [CONTAINS] llap - Reduce Output Operator [RS_233] - PartitionCols:_col0, _col1 - Select Operator [SEL_231] (rows=193/61 width=177) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_230] (rows=193/61 width=177) - Conds:RS_104._col3=RS_105._col0(Inner),Output:["_col1","_col2"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_104] - PartitionCols:_col3 - Please refer to the previous Merge Join Operator [MERGEJOIN_217] - <-Reducer 29 [SIMPLE_EDGE] llap - SHUFFLE [RS_105] - PartitionCols:_col0 - Select Operator [SEL_100] (rows=1525/319 width=91) - Output:["_col0"] - Group By Operator [GBY_99] (rows=1525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 28 [SIMPLE_EDGE] - <-Map 32 [CONTAINS] llap - Reduce Output Operator [RS_294] - PartitionCols:_col1, _col0 - Select Operator [SEL_292] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_291] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_290] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 27 [CONTAINS] llap - Reduce Output Operator [RS_279] - PartitionCols:_col1, _col0 - Select Operator [SEL_277] (rows=1025/319 width=178) - Output:["_col0","_col1"] - Group By Operator [GBY_276] (rows=1025/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 26 [SIMPLE_EDGE] - <-Map 31 [CONTAINS] llap - Reduce Output Operator [RS_289] - PartitionCols:_col1, _col0 - Select Operator [SEL_287] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_286] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_285] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 25 [CONTAINS] llap - Reduce Output Operator [RS_275] - PartitionCols:_col1, _col0 - Select Operator [SEL_273] (rows=525/319 width=178) - Output:["_col0","_col1"] - Group By Operator [GBY_272] (rows=525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 24 [SIMPLE_EDGE] - <-Map 23 [CONTAINS] llap - Reduce Output Operator [RS_271] - PartitionCols:_col1, _col0 - Select Operator [SEL_269] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_268] (rows=25/25 width=175) - predicate:value is not null - TableScan [TS_267] (rows=25/25 width=175) - Output:["key","value"] - <-Map 30 [CONTAINS] llap - Reduce Output Operator [RS_284] - PartitionCols:_col1, _col0 - Select Operator [SEL_282] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_281] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_280] (rows=500/500 width=178) - Output:["key","value"] PREHOOK: query: CREATE TABLE srcbucket_mapjoin_n11(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -1181,24 +1181,24 @@ POSTHOOK: Output: default@c_n3 Plan optimized by CBO. Vertex dependency in root stage -Map 13 <- Union 14 (CONTAINS) -Map 15 <- Union 14 (CONTAINS) -Map 17 <- Union 18 (CONTAINS) -Map 19 <- Union 18 (CONTAINS) -Map 20 <- Union 18 (CONTAINS) -Map 22 <- Union 23 (CONTAINS) -Map 24 <- Union 23 (CONTAINS) -Map 25 <- Union 23 (CONTAINS) -Map 26 <- Union 23 (CONTAINS) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Union 23 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 14 (SIMPLE_EDGE), Union 4 (CONTAINS) +Map 1 <- Union 2 (CONTAINS) +Map 16 <- Union 17 (CONTAINS) +Map 18 <- Union 17 (CONTAINS) +Map 19 <- Union 17 (CONTAINS) +Map 21 <- Union 22 (CONTAINS) +Map 23 <- Union 22 (CONTAINS) +Map 24 <- Union 22 (CONTAINS) +Map 25 <- Union 22 (CONTAINS) +Map 8 <- Union 2 (CONTAINS) +Reducer 10 <- Map 13 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 11 <- Map 20 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Union 17 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Union 22 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 5 <- Union 4 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Union 4 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Union 4 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 18 (SIMPLE_EDGE), Union 4 (CONTAINS) Stage-5 Stats Work{} @@ -1209,240 +1209,240 @@ Stage-5 Dependency Collection{} Stage-3 Reducer 5 llap - File Output Operator [FS_81] - Group By Operator [GBY_79] (rows=1/1 width=880) + File Output Operator [FS_84] + Group By Operator [GBY_82] (rows=1/1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] <-Union 4 [CUSTOM_SIMPLE_EDGE] <-Reducer 12 [CONTAINS] llap - File Output Operator [FS_233] + File Output Operator [FS_234] table:{"name:":"default.a_n14"} - Select Operator [SEL_231] (rows=193/820 width=175) + Select Operator [SEL_232] (rows=2640/5421 width=178) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_230] (rows=193/820 width=175) - Conds:RS_69._col1=Union 23._col0(Inner),Output:["_col0","_col3"] + Merge Join Operator [MERGEJOIN_231] (rows=2640/5421 width=178) + Conds:Union 17._col0=RS_44._col1(Inner),Output:["_col1","_col4"] <-Reducer 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_69] + SHUFFLE [RS_44] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_202] (rows=39/115 width=264) - Conds:RS_66._col0=RS_67._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_66] + Merge Join Operator [MERGEJOIN_204] (rows=791/1028 width=269) + Conds:RS_39._col0=RS_40._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_39] PartitionCols:_col0 - Select Operator [SEL_5] (rows=25/25 width=175) + Select Operator [SEL_35] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_120] (rows=25/25 width=175) + Filter Operator [FIL_129] (rows=500/500 width=178) predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 21 [SIMPLE_EDGE] llap - SHUFFLE [RS_67] + TableScan [TS_8] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 20 [SIMPLE_EDGE] llap + SHUFFLE [RS_40] PartitionCols:_col0 - Select Operator [SEL_51] (rows=25/25 width=175) + Select Operator [SEL_38] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_129] (rows=25/25 width=175) + Filter Operator [FIL_130] (rows=500/500 width=178) predicate:key is not null - TableScan [TS_49] (rows=25/25 width=175) - default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 23 [SIMPLE_EDGE] - <-Map 22 [CONTAINS] llap - Reduce Output Operator [RS_271] - PartitionCols:_col0 - Select Operator [SEL_269] (rows=25/25 width=89) - Output:["_col0"] - Filter Operator [FIL_268] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_267] (rows=25/25 width=89) - Output:["value"] - <-Map 24 [CONTAINS] llap - Reduce Output Operator [RS_276] + TableScan [TS_36] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 17 [SIMPLE_EDGE] + <-Map 16 [CONTAINS] llap + Reduce Output Operator [RS_259] PartitionCols:_col0 - Select Operator [SEL_274] (rows=500/500 width=91) + Select Operator [SEL_257] (rows=25/25 width=89) Output:["_col0"] - Filter Operator [FIL_273] (rows=500/500 width=91) + Filter Operator [FIL_256] (rows=25/25 width=89) predicate:value is not null - TableScan [TS_272] (rows=500/500 width=91) + TableScan [TS_255] (rows=25/25 width=89) Output:["value"] - <-Map 25 [CONTAINS] llap - Reduce Output Operator [RS_281] + <-Map 18 [CONTAINS] llap + Reduce Output Operator [RS_264] PartitionCols:_col0 - Select Operator [SEL_279] (rows=500/500 width=91) + Select Operator [SEL_262] (rows=500/500 width=91) Output:["_col0"] - Filter Operator [FIL_278] (rows=500/500 width=91) + Filter Operator [FIL_261] (rows=500/500 width=91) predicate:value is not null - TableScan [TS_277] (rows=500/500 width=91) + TableScan [TS_260] (rows=500/500 width=91) Output:["value"] - <-Map 26 [CONTAINS] llap - Reduce Output Operator [RS_286] + <-Map 19 [CONTAINS] llap + Reduce Output Operator [RS_269] PartitionCols:_col0 - Select Operator [SEL_284] (rows=500/500 width=91) + Select Operator [SEL_267] (rows=500/500 width=91) Output:["_col0"] - Filter Operator [FIL_283] (rows=500/500 width=91) + Filter Operator [FIL_266] (rows=500/500 width=91) predicate:value is not null - TableScan [TS_282] (rows=500/500 width=91) + TableScan [TS_265] (rows=500/500 width=91) Output:["value"] - Reduce Output Operator [RS_239] - Select Operator [SEL_234] (rows=2899/820 width=178) + Reduce Output Operator [RS_240] + Select Operator [SEL_235] (rows=2899/5421 width=178) Output:["key","value"] - Please refer to the previous Select Operator [SEL_231] - File Output Operator [FS_235] + Please refer to the previous Select Operator [SEL_232] + File Output Operator [FS_236] table:{"name:":"default.b_n10"} - Please refer to the previous Select Operator [SEL_231] - Reduce Output Operator [RS_240] - Select Operator [SEL_236] (rows=2899/820 width=178) + Please refer to the previous Select Operator [SEL_232] + Reduce Output Operator [RS_241] + Select Operator [SEL_237] (rows=2899/5421 width=178) Output:["key","value"] - Please refer to the previous Select Operator [SEL_231] - File Output Operator [FS_237] + Please refer to the previous Select Operator [SEL_232] + File Output Operator [FS_238] table:{"name:":"default.c_n3"} - Please refer to the previous Select Operator [SEL_231] - Reduce Output Operator [RS_241] - Select Operator [SEL_238] (rows=2899/820 width=178) + Please refer to the previous Select Operator [SEL_232] + Reduce Output Operator [RS_242] + Select Operator [SEL_239] (rows=2899/5421 width=178) Output:["key","value"] - Please refer to the previous Select Operator [SEL_231] - <-Reducer 3 [CONTAINS] llap - File Output Operator [FS_209] + Please refer to the previous Select Operator [SEL_232] + <-Reducer 15 [CONTAINS] llap + File Output Operator [FS_246] table:{"name:":"default.a_n14"} - Select Operator [SEL_207] (rows=66/170 width=177) + Select Operator [SEL_244] (rows=193/820 width=175) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_206] (rows=66/170 width=177) - Conds:RS_17._col3=Union 14._col0(Inner),Output:["_col1","_col2"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_200] (rows=39/37 width=266) - Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_14] + Merge Join Operator [MERGEJOIN_243] (rows=193/820 width=175) + Conds:Union 22._col0=RS_73._col1(Inner),Output:["_col1","_col4"] + <-Reducer 14 [SIMPLE_EDGE] llap + SHUFFLE [RS_73] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_205] (rows=39/115 width=264) + Conds:RS_68._col0=RS_69._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 13 [SIMPLE_EDGE] llap + SHUFFLE [RS_68] PartitionCols:_col0 - Select Operator [SEL_2] (rows=500/500 width=178) + Select Operator [SEL_13] (rows=25/25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_119] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_0] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_15] + Filter Operator [FIL_125] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_11] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 26 [SIMPLE_EDGE] llap + SHUFFLE [RS_69] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_5] - <-Union 14 [SIMPLE_EDGE] - <-Map 13 [CONTAINS] llap - Reduce Output Operator [RS_246] + Select Operator [SEL_67] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_136] (rows=25/25 width=175) + predicate:key is not null + TableScan [TS_65] (rows=25/25 width=175) + default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 22 [SIMPLE_EDGE] + <-Map 21 [CONTAINS] llap + Reduce Output Operator [RS_274] PartitionCols:_col0 - Select Operator [SEL_244] (rows=25/25 width=89) + Select Operator [SEL_272] (rows=25/25 width=89) Output:["_col0"] - Filter Operator [FIL_243] (rows=25/25 width=89) + Filter Operator [FIL_271] (rows=25/25 width=89) predicate:value is not null - TableScan [TS_242] (rows=25/25 width=89) + TableScan [TS_270] (rows=25/25 width=89) Output:["value"] - <-Map 15 [CONTAINS] llap - Reduce Output Operator [RS_251] + <-Map 23 [CONTAINS] llap + Reduce Output Operator [RS_279] PartitionCols:_col0 - Select Operator [SEL_249] (rows=500/500 width=91) + Select Operator [SEL_277] (rows=500/500 width=91) Output:["_col0"] - Filter Operator [FIL_248] (rows=500/500 width=91) + Filter Operator [FIL_276] (rows=500/500 width=91) predicate:value is not null - TableScan [TS_247] (rows=500/500 width=91) + TableScan [TS_275] (rows=500/500 width=91) Output:["value"] - Reduce Output Operator [RS_215] - Select Operator [SEL_210] (rows=2899/170 width=178) + <-Map 24 [CONTAINS] llap + Reduce Output Operator [RS_284] + PartitionCols:_col0 + Select Operator [SEL_282] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_281] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_280] (rows=500/500 width=91) + Output:["value"] + <-Map 25 [CONTAINS] llap + Reduce Output Operator [RS_289] + PartitionCols:_col0 + Select Operator [SEL_287] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_286] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_285] (rows=500/500 width=91) + Output:["value"] + Reduce Output Operator [RS_252] + Select Operator [SEL_247] (rows=2899/820 width=178) Output:["key","value"] - Please refer to the previous Select Operator [SEL_207] - File Output Operator [FS_211] + Please refer to the previous Select Operator [SEL_244] + File Output Operator [FS_248] table:{"name:":"default.b_n10"} - Please refer to the previous Select Operator [SEL_207] - Reduce Output Operator [RS_216] - Select Operator [SEL_212] (rows=2899/170 width=178) + Please refer to the previous Select Operator [SEL_244] + Reduce Output Operator [RS_253] + Select Operator [SEL_249] (rows=2899/820 width=178) Output:["key","value"] - Please refer to the previous Select Operator [SEL_207] - File Output Operator [FS_213] + Please refer to the previous Select Operator [SEL_244] + File Output Operator [FS_250] table:{"name:":"default.c_n3"} - Please refer to the previous Select Operator [SEL_207] - Reduce Output Operator [RS_217] - Select Operator [SEL_214] (rows=2899/170 width=178) + Please refer to the previous Select Operator [SEL_244] + Reduce Output Operator [RS_254] + Select Operator [SEL_251] (rows=2899/820 width=178) Output:["key","value"] - Please refer to the previous Select Operator [SEL_207] - <-Reducer 9 [CONTAINS] llap - File Output Operator [FS_221] + Please refer to the previous Select Operator [SEL_244] + <-Reducer 3 [CONTAINS] llap + File Output Operator [FS_217] table:{"name:":"default.a_n14"} - Select Operator [SEL_219] (rows=2640/5421 width=178) + Select Operator [SEL_215] (rows=66/170 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_218] (rows=2640/5421 width=178) - Conds:RS_41._col1=Union 18._col0(Inner),Output:["_col0","_col3"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_41] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_201] (rows=791/1028 width=269) - Conds:RS_38._col0=RS_39._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_38] + Merge Join Operator [MERGEJOIN_214] (rows=66/170 width=177) + Conds:Union 2._col0=RS_19._col3(Inner),Output:["_col2","_col3"] + <-Reducer 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_19] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_203] (rows=39/37 width=266) + Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 13 [SIMPLE_EDGE] llap + SHUFFLE [RS_15] PartitionCols:_col0 - Select Operator [SEL_23] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_123] (rows=500/500 width=178) - predicate:(key is not null and value is not null) - Please refer to the previous TableScan [TS_0] - <-Map 16 [SIMPLE_EDGE] llap - SHUFFLE [RS_39] + Please refer to the previous Select Operator [SEL_13] + <-Map 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] PartitionCols:_col0 - Select Operator [SEL_26] (rows=500/500 width=178) + Select Operator [SEL_10] (rows=500/500 width=178) Output:["_col0","_col1"] Filter Operator [FIL_124] (rows=500/500 width=178) predicate:key is not null - TableScan [TS_24] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 18 [SIMPLE_EDGE] - <-Map 17 [CONTAINS] llap - Reduce Output Operator [RS_256] + Please refer to the previous TableScan [TS_8] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] llap + Reduce Output Operator [RS_213] PartitionCols:_col0 - Select Operator [SEL_254] (rows=25/25 width=89) + Select Operator [SEL_211] (rows=25/25 width=89) Output:["_col0"] - Filter Operator [FIL_253] (rows=25/25 width=89) + Filter Operator [FIL_210] (rows=25/25 width=89) predicate:value is not null - TableScan [TS_252] (rows=25/25 width=89) + TableScan [TS_209] (rows=25/25 width=89) Output:["value"] - <-Map 19 [CONTAINS] llap - Reduce Output Operator [RS_261] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_230] PartitionCols:_col0 - Select Operator [SEL_259] (rows=500/500 width=91) + Select Operator [SEL_228] (rows=500/500 width=91) Output:["_col0"] - Filter Operator [FIL_258] (rows=500/500 width=91) + Filter Operator [FIL_227] (rows=500/500 width=91) predicate:value is not null - TableScan [TS_257] (rows=500/500 width=91) + TableScan [TS_226] (rows=500/500 width=91) Output:["value"] - <-Map 20 [CONTAINS] llap - Reduce Output Operator [RS_266] - PartitionCols:_col0 - Select Operator [SEL_264] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_263] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_262] (rows=500/500 width=91) - Output:["value"] - Reduce Output Operator [RS_227] - Select Operator [SEL_222] (rows=2899/5421 width=178) + Reduce Output Operator [RS_223] + Select Operator [SEL_218] (rows=2899/170 width=178) Output:["key","value"] - Please refer to the previous Select Operator [SEL_219] - File Output Operator [FS_223] + Please refer to the previous Select Operator [SEL_215] + File Output Operator [FS_219] table:{"name:":"default.b_n10"} - Please refer to the previous Select Operator [SEL_219] - Reduce Output Operator [RS_228] - Select Operator [SEL_224] (rows=2899/5421 width=178) + Please refer to the previous Select Operator [SEL_215] + Reduce Output Operator [RS_224] + Select Operator [SEL_220] (rows=2899/170 width=178) Output:["key","value"] - Please refer to the previous Select Operator [SEL_219] - File Output Operator [FS_225] + Please refer to the previous Select Operator [SEL_215] + File Output Operator [FS_221] table:{"name:":"default.c_n3"} - Please refer to the previous Select Operator [SEL_219] - Reduce Output Operator [RS_229] - Select Operator [SEL_226] (rows=2899/5421 width=178) + Please refer to the previous Select Operator [SEL_215] + Reduce Output Operator [RS_225] + Select Operator [SEL_222] (rows=2899/170 width=178) Output:["key","value"] - Please refer to the previous Select Operator [SEL_219] + Please refer to the previous Select Operator [SEL_215] Reducer 6 llap - File Output Operator [FS_89] - Group By Operator [GBY_87] (rows=1/1 width=880) + File Output Operator [FS_92] + Group By Operator [GBY_90] (rows=1/1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] Reducer 7 llap - File Output Operator [FS_97] - Group By Operator [GBY_95] (rows=1/1 width=880) + File Output Operator [FS_100] + Group By Operator [GBY_98] (rows=1/1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] Stage-6 @@ -1543,32 +1543,32 @@ POSTHOOK: Output: default@c_n3 Plan optimized by CBO. Vertex dependency in root stage -Map 16 <- Union 17 (CONTAINS) -Map 19 <- Union 17 (CONTAINS) -Map 21 <- Union 22 (CONTAINS) -Map 26 <- Union 22 (CONTAINS) -Map 27 <- Union 24 (CONTAINS) -Map 29 <- Union 30 (CONTAINS) -Map 36 <- Union 30 (CONTAINS) -Map 37 <- Union 32 (CONTAINS) -Map 38 <- Union 34 (CONTAINS) -Reducer 10 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 11 <- Map 1 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 18 <- Union 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) -Reducer 23 <- Union 22 (SIMPLE_EDGE), Union 24 (CONTAINS) -Reducer 25 <- Union 24 (SIMPLE_EDGE) -Reducer 3 <- Reducer 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 31 <- Union 30 (SIMPLE_EDGE), Union 32 (CONTAINS) -Reducer 33 <- Union 32 (SIMPLE_EDGE), Union 34 (CONTAINS) -Reducer 35 <- Union 34 (SIMPLE_EDGE) -Reducer 5 <- Union 4 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 7 <- Union 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Union 2 (CONTAINS) +Map 12 <- Union 2 (CONTAINS) +Map 20 <- Union 21 (CONTAINS) +Map 25 <- Union 21 (CONTAINS) +Map 26 <- Union 23 (CONTAINS) +Map 28 <- Union 29 (CONTAINS) +Map 35 <- Union 29 (CONTAINS) +Map 36 <- Union 31 (CONTAINS) +Map 37 <- Union 33 (CONTAINS) +Reducer 10 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 15 <- Map 13 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 38 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 22 <- Union 21 (SIMPLE_EDGE), Union 23 (CONTAINS) +Reducer 24 <- Union 23 (SIMPLE_EDGE) +Reducer 3 <- Union 2 (SIMPLE_EDGE) +Reducer 30 <- Union 29 (SIMPLE_EDGE), Union 31 (CONTAINS) +Reducer 32 <- Union 31 (SIMPLE_EDGE), Union 33 (CONTAINS) +Reducer 34 <- Union 33 (SIMPLE_EDGE) +Reducer 4 <- Reducer 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 8 <- Union 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) Stage-5 Stats Work{} @@ -1579,253 +1579,253 @@ Stage-5 Dependency Collection{} Stage-3 Reducer 10 llap - File Output Operator [FS_137] - Group By Operator [GBY_135] (rows=1/1 width=880) + File Output Operator [FS_132] + Group By Operator [GBY_130] (rows=1/1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_134] - Select Operator [SEL_133] (rows=2899/319 width=178) + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_129] + Select Operator [SEL_128] (rows=2899/319 width=178) Output:["key","value"] - Group By Operator [GBY_112] (rows=2899/319 width=178) + Group By Operator [GBY_115] (rows=2899/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 6 [SIMPLE_EDGE] - <-Reducer 15 [CONTAINS] llap - Reduce Output Operator [RS_260] + <-Union 7 [SIMPLE_EDGE] + <-Reducer 19 [CONTAINS] llap + Reduce Output Operator [RS_273] PartitionCols:_col0, _col1 - Select Operator [SEL_258] (rows=193/304 width=175) + Select Operator [SEL_271] (rows=193/304 width=175) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_257] (rows=193/304 width=175) - Conds:RS_104._col1=RS_105._col0(Inner),Output:["_col0","_col3"] - <-Reducer 14 [SIMPLE_EDGE] llap - SHUFFLE [RS_104] + Merge Join Operator [MERGEJOIN_270] (rows=193/304 width=175) + Conds:RS_107._col0=RS_108._col1(Inner),Output:["_col1","_col4"] + <-Reducer 18 [SIMPLE_EDGE] llap + SHUFFLE [RS_108] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_242] (rows=39/115 width=264) - Conds:RS_101._col0=RS_102._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 13 [SIMPLE_EDGE] llap - SHUFFLE [RS_101] + Merge Join Operator [MERGEJOIN_245] (rows=39/115 width=264) + Conds:RS_103._col0=RS_104._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 17 [SIMPLE_EDGE] llap + SHUFFLE [RS_103] PartitionCols:_col0 - Select Operator [SEL_5] (rows=25/25 width=175) + Select Operator [SEL_17] (rows=25/25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_160] (rows=25/25 width=175) + Filter Operator [FIL_165] (rows=25/25 width=175) predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25/25 width=175) + TableScan [TS_15] (rows=25/25 width=175) default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 28 [SIMPLE_EDGE] llap - SHUFFLE [RS_102] + <-Map 38 [SIMPLE_EDGE] llap + SHUFFLE [RS_104] PartitionCols:_col0 - Select Operator [SEL_70] (rows=25/25 width=175) + Select Operator [SEL_102] (rows=25/25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_169] (rows=25/25 width=175) + Filter Operator [FIL_176] (rows=25/25 width=175) predicate:key is not null - TableScan [TS_68] (rows=25/25 width=175) + TableScan [TS_100] (rows=25/25 width=175) default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 35 [SIMPLE_EDGE] llap - SHUFFLE [RS_105] + <-Reducer 34 [SIMPLE_EDGE] llap + SHUFFLE [RS_107] PartitionCols:_col0 - Select Operator [SEL_100] (rows=1525/319 width=91) + Select Operator [SEL_96] (rows=1525/319 width=91) Output:["_col0"] - Group By Operator [GBY_99] (rows=1525/319 width=178) + Group By Operator [GBY_95] (rows=1525/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 34 [SIMPLE_EDGE] - <-Map 38 [CONTAINS] llap - Reduce Output Operator [RS_317] + <-Union 33 [SIMPLE_EDGE] + <-Map 37 [CONTAINS] llap + Reduce Output Operator [RS_320] PartitionCols:_col1, _col0 - Select Operator [SEL_315] (rows=500/500 width=178) + Select Operator [SEL_318] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_314] (rows=500/500 width=178) + Filter Operator [FIL_317] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_313] (rows=500/500 width=178) + TableScan [TS_316] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 33 [CONTAINS] llap - Reduce Output Operator [RS_302] + <-Reducer 32 [CONTAINS] llap + Reduce Output Operator [RS_305] PartitionCols:_col1, _col0 - Select Operator [SEL_300] (rows=1025/319 width=178) + Select Operator [SEL_303] (rows=1025/319 width=178) Output:["_col0","_col1"] - Group By Operator [GBY_299] (rows=1025/319 width=178) + Group By Operator [GBY_302] (rows=1025/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 32 [SIMPLE_EDGE] - <-Map 37 [CONTAINS] llap - Reduce Output Operator [RS_312] + <-Union 31 [SIMPLE_EDGE] + <-Map 36 [CONTAINS] llap + Reduce Output Operator [RS_315] PartitionCols:_col1, _col0 - Select Operator [SEL_310] (rows=500/500 width=178) + Select Operator [SEL_313] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_309] (rows=500/500 width=178) + Filter Operator [FIL_312] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_308] (rows=500/500 width=178) + TableScan [TS_311] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 31 [CONTAINS] llap - Reduce Output Operator [RS_298] + <-Reducer 30 [CONTAINS] llap + Reduce Output Operator [RS_301] PartitionCols:_col1, _col0 - Select Operator [SEL_296] (rows=525/319 width=178) + Select Operator [SEL_299] (rows=525/319 width=178) Output:["_col0","_col1"] - Group By Operator [GBY_295] (rows=525/319 width=178) + Group By Operator [GBY_298] (rows=525/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 30 [SIMPLE_EDGE] - <-Map 29 [CONTAINS] llap - Reduce Output Operator [RS_294] + <-Union 29 [SIMPLE_EDGE] + <-Map 28 [CONTAINS] llap + Reduce Output Operator [RS_297] PartitionCols:_col1, _col0 - Select Operator [SEL_292] (rows=25/25 width=175) + Select Operator [SEL_295] (rows=25/25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_291] (rows=25/25 width=175) + Filter Operator [FIL_294] (rows=25/25 width=175) predicate:value is not null - TableScan [TS_290] (rows=25/25 width=175) + TableScan [TS_293] (rows=25/25 width=175) Output:["key","value"] - <-Map 36 [CONTAINS] llap - Reduce Output Operator [RS_307] + <-Map 35 [CONTAINS] llap + Reduce Output Operator [RS_310] PartitionCols:_col1, _col0 - Select Operator [SEL_305] (rows=500/500 width=178) + Select Operator [SEL_308] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_304] (rows=500/500 width=178) + Filter Operator [FIL_307] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_303] (rows=500/500 width=178) + TableScan [TS_306] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 5 [CONTAINS] llap - Reduce Output Operator [RS_252] + <-Reducer 6 [CONTAINS] llap + Reduce Output Operator [RS_260] PartitionCols:_col0, _col1 - Group By Operator [GBY_250] (rows=2706/309 width=178) + Group By Operator [GBY_258] (rows=2706/309 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 4 [SIMPLE_EDGE] - <-Reducer 12 [CONTAINS] llap - Reduce Output Operator [RS_256] + <-Union 5 [SIMPLE_EDGE] + <-Reducer 16 [CONTAINS] llap + Reduce Output Operator [RS_269] PartitionCols:_col0, _col1 - Select Operator [SEL_254] (rows=2640/1056 width=178) + Select Operator [SEL_267] (rows=2640/1056 width=178) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_253] (rows=2640/1056 width=178) - Conds:RS_55._col1=RS_56._col0(Inner),Output:["_col0","_col3"] - <-Reducer 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_55] + Merge Join Operator [MERGEJOIN_266] (rows=2640/1056 width=178) + Conds:RS_57._col0=RS_58._col1(Inner),Output:["_col1","_col4"] + <-Reducer 15 [SIMPLE_EDGE] llap + SHUFFLE [RS_58] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_241] (rows=791/1028 width=269) - Conds:RS_52._col0=RS_53._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_52] + Merge Join Operator [MERGEJOIN_244] (rows=791/1028 width=269) + Conds:RS_53._col0=RS_54._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 13 [SIMPLE_EDGE] llap + SHUFFLE [RS_53] PartitionCols:_col0 - Select Operator [SEL_27] (rows=500/500 width=178) + Select Operator [SEL_49] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_163] (rows=500/500 width=178) + Filter Operator [FIL_169] (rows=500/500 width=178) predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=500/500 width=178) + TableScan [TS_12] (rows=500/500 width=178) default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 20 [SIMPLE_EDGE] llap - SHUFFLE [RS_53] + <-Map 27 [SIMPLE_EDGE] llap + SHUFFLE [RS_54] PartitionCols:_col0 - Select Operator [SEL_30] (rows=500/500 width=178) + Select Operator [SEL_52] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_164] (rows=500/500 width=178) + Filter Operator [FIL_170] (rows=500/500 width=178) predicate:key is not null - TableScan [TS_28] (rows=500/500 width=178) + TableScan [TS_50] (rows=500/500 width=178) default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 25 [SIMPLE_EDGE] llap - SHUFFLE [RS_56] + <-Reducer 24 [SIMPLE_EDGE] llap + SHUFFLE [RS_57] PartitionCols:_col0 - Select Operator [SEL_51] (rows=1025/319 width=91) + Select Operator [SEL_46] (rows=1025/319 width=91) Output:["_col0"] - Group By Operator [GBY_50] (rows=1025/319 width=178) + Group By Operator [GBY_45] (rows=1025/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 24 [SIMPLE_EDGE] - <-Map 27 [CONTAINS] llap - Reduce Output Operator [RS_289] + <-Union 23 [SIMPLE_EDGE] + <-Map 26 [CONTAINS] llap + Reduce Output Operator [RS_292] PartitionCols:_col1, _col0 - Select Operator [SEL_287] (rows=500/500 width=178) + Select Operator [SEL_290] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_286] (rows=500/500 width=178) + Filter Operator [FIL_289] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_285] (rows=500/500 width=178) + TableScan [TS_288] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 23 [CONTAINS] llap - Reduce Output Operator [RS_279] + <-Reducer 22 [CONTAINS] llap + Reduce Output Operator [RS_282] PartitionCols:_col1, _col0 - Select Operator [SEL_277] (rows=525/319 width=178) + Select Operator [SEL_280] (rows=525/319 width=178) Output:["_col0","_col1"] - Group By Operator [GBY_276] (rows=525/319 width=178) + Group By Operator [GBY_279] (rows=525/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 22 [SIMPLE_EDGE] - <-Map 21 [CONTAINS] llap - Reduce Output Operator [RS_275] + <-Union 21 [SIMPLE_EDGE] + <-Map 20 [CONTAINS] llap + Reduce Output Operator [RS_278] PartitionCols:_col1, _col0 - Select Operator [SEL_273] (rows=25/25 width=175) + Select Operator [SEL_276] (rows=25/25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_272] (rows=25/25 width=175) + Filter Operator [FIL_275] (rows=25/25 width=175) predicate:value is not null - TableScan [TS_271] (rows=25/25 width=175) + TableScan [TS_274] (rows=25/25 width=175) Output:["key","value"] - <-Map 26 [CONTAINS] llap - Reduce Output Operator [RS_284] + <-Map 25 [CONTAINS] llap + Reduce Output Operator [RS_287] PartitionCols:_col1, _col0 - Select Operator [SEL_282] (rows=500/500 width=178) + Select Operator [SEL_285] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_281] (rows=500/500 width=178) + Filter Operator [FIL_284] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_280] (rows=500/500 width=178) + TableScan [TS_283] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 3 [CONTAINS] llap - Reduce Output Operator [RS_249] + <-Reducer 4 [CONTAINS] llap + Reduce Output Operator [RS_257] PartitionCols:_col0, _col1 - Select Operator [SEL_247] (rows=66/61 width=177) + Select Operator [SEL_255] (rows=66/61 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_246] (rows=66/61 width=177) - Conds:RS_21._col3=RS_22._col0(Inner),Output:["_col1","_col2"] - <-Reducer 18 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_254] (rows=66/61 width=177) + Conds:RS_22._col0=RS_23._col3(Inner),Output:["_col2","_col3"] + <-Reducer 14 [SIMPLE_EDGE] llap + SHUFFLE [RS_23] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_243] (rows=39/37 width=266) + Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 13 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_164] (rows=500/500 width=178) + predicate:key is not null + Please refer to the previous TableScan [TS_12] + <-Map 17 [SIMPLE_EDGE] llap + SHUFFLE [RS_19] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_17] + <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_22] PartitionCols:_col0 - Select Operator [SEL_17] (rows=525/319 width=91) + Select Operator [SEL_11] (rows=525/319 width=91) Output:["_col0"] - Group By Operator [GBY_16] (rows=525/319 width=178) + Group By Operator [GBY_10] (rows=525/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 17 [SIMPLE_EDGE] - <-Map 16 [CONTAINS] llap - Reduce Output Operator [RS_265] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] llap + Reduce Output Operator [RS_253] PartitionCols:_col1, _col0 - Select Operator [SEL_263] (rows=25/25 width=175) + Select Operator [SEL_251] (rows=25/25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_262] (rows=25/25 width=175) + Filter Operator [FIL_250] (rows=25/25 width=175) predicate:value is not null - TableScan [TS_261] (rows=25/25 width=175) + TableScan [TS_249] (rows=25/25 width=175) Output:["key","value"] - <-Map 19 [CONTAINS] llap - Reduce Output Operator [RS_270] + <-Map 12 [CONTAINS] llap + Reduce Output Operator [RS_265] PartitionCols:_col1, _col0 - Select Operator [SEL_268] (rows=500/500 width=178) + Select Operator [SEL_263] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_267] (rows=500/500 width=178) + Filter Operator [FIL_262] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_266] (rows=500/500 width=178) + TableScan [TS_261] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_21] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_240] (rows=39/37 width=266) - Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_159] (rows=500/500 width=178) - predicate:key is not null - Please refer to the previous TableScan [TS_0] - <-Map 13 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_5] - Reducer 8 llap - File Output Operator [FS_121] - Group By Operator [GBY_119] (rows=1/1 width=880) + Reducer 11 llap + File Output Operator [FS_140] + Group By Operator [GBY_138] (rows=1/1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_118] - Select Operator [SEL_117] (rows=2899/319 width=178) + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_137] + Select Operator [SEL_136] (rows=2899/319 width=178) Output:["key","value"] - Please refer to the previous Group By Operator [GBY_112] + Please refer to the previous Group By Operator [GBY_115] Reducer 9 llap - File Output Operator [FS_129] - Group By Operator [GBY_127] (rows=1/1 width=880) + File Output Operator [FS_124] + Group By Operator [GBY_122] (rows=1/1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_126] - Select Operator [SEL_125] (rows=2899/319 width=178) + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_121] + Select Operator [SEL_120] (rows=2899/319 width=178) Output:["key","value"] - Please refer to the previous Group By Operator [GBY_112] + Please refer to the previous Group By Operator [GBY_115] Stage-6 Stats Work{} Stage-1 diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 7f0ce5a9c7..cd2766fbb1 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -434,11 +434,11 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 @@ -446,70 +446,72 @@ Stage-0 limit:-1 Stage-1 Reducer 5 llap - File Output Operator [FS_31] - Select Operator [SEL_29] (rows=1 width=20) + File Output Operator [FS_32] + Select Operator [SEL_30] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] - Select Operator [SEL_27] (rows=1 width=28) + SHUFFLE [RS_29] + Select Operator [SEL_28] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_26] (rows=1 width=20) + Group By Operator [GBY_27] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + SHUFFLE [RS_26] PartitionCols:_col0, _col1 - Group By Operator [GBY_24] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col5, _col1 - Select Operator [SEL_23] (rows=1 width=20) - Output:["_col1","_col5"] - Merge Join Operator [MERGEJOIN_64] (rows=1 width=20) - Conds:RS_20._col3=RS_21._col0(Inner),Output:["_col1","_col4","_col5","_col7"],residual filter predicates:{((_col4 + _col7) >= 0)} + Group By Operator [GBY_25] (rows=1 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col7, _col3 + Select Operator [SEL_24] (rows=1 width=20) + Output:["_col3","_col7"] + Merge Join Operator [MERGEJOIN_65] (rows=1 width=20) + Conds:RS_21._col0=RS_22._col3(Inner),Output:["_col1","_col3","_col6","_col7"],residual filter predicates:{((_col6 + _col1) >= 0)} <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_63] (rows=5 width=104) - Conds:RS_17._col0=RS_18._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"],residual filter predicates:{((_col4 > 0) or _col2)} - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_37] (rows=18 width=84) - predicate:key is not null - TableScan [TS_0] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=2 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=2 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=2 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_38] (rows=5 width=93) - predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 9 [SIMPLE_EDGE] llap SHUFFLE [RS_21] PartitionCols:_col0 - Select Operator [SEL_16] (rows=2 width=89) + Select Operator [SEL_6] (rows=2 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=2 width=93) + Group By Operator [GBY_5] (rows=2 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_14] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_13] (rows=2 width=93) + Group By Operator [GBY_3] (rows=2 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_39] (rows=5 width=93) + Filter Operator [FIL_38] (rows=5 width=93) predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0) and key is not null) - TableScan [TS_10] (rows=20 width=88) + TableScan [TS_0] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_22] + PartitionCols:_col3 + Select Operator [SEL_20] (rows=5 width=104) + Output:["_col1","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_64] (rows=5 width=104) + Conds:RS_17._col0=RS_18._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"],residual filter predicates:{((_col4 > 0) or _col2)} + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=18 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_39] (rows=18 width=84) + predicate:key is not null + TableScan [TS_7] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_16] (rows=2 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_15] (rows=2 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_13] (rows=2 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_40] (rows=5 width=93) + predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0) and key is not null) + TableScan [TS_10] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b % c asc, b desc) cbo_t1 left outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p left outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int % c asc, cbo_t3.c_int desc PREHOOK: type: QUERY @@ -530,11 +532,11 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 @@ -542,70 +544,72 @@ Stage-0 limit:-1 Stage-1 Reducer 5 llap - File Output Operator [FS_31] - Select Operator [SEL_29] (rows=1 width=20) + File Output Operator [FS_32] + Select Operator [SEL_30] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] - Select Operator [SEL_27] (rows=1 width=28) + SHUFFLE [RS_29] + Select Operator [SEL_28] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_26] (rows=1 width=20) + Group By Operator [GBY_27] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + SHUFFLE [RS_26] PartitionCols:_col0, _col1 - Group By Operator [GBY_24] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col7 - Select Operator [SEL_23] (rows=1 width=20) - Output:["_col1","_col7"] - Merge Join Operator [MERGEJOIN_64] (rows=1 width=20) - Conds:RS_20._col5=RS_21._col0(Inner),Output:["_col1","_col6","_col7","_col9"],residual filter predicates:{((_col6 + _col9) >= 0)} + Group By Operator [GBY_25] (rows=1 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col3, _col9 + Select Operator [SEL_24] (rows=1 width=20) + Output:["_col3","_col9"] + Merge Join Operator [MERGEJOIN_65] (rows=1 width=20) + Conds:RS_21._col0=RS_22._col5(Inner),Output:["_col1","_col3","_col8","_col9"],residual filter predicates:{((_col8 + _col1) >= 0)} <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_63] (rows=1 width=117) - Conds:RS_17._col0=RS_18._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"],residual filter predicates:{((_col6 > 0) or _col2)} {(_col3 or (_col7 >= 1L))} {((_col4 + _col7) >= 0L)} - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=99) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_37] (rows=18 width=84) - predicate:((c_int > 0) and key is not null) - TableScan [TS_0] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=1 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_38] (rows=2 width=93) - predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 9 [SIMPLE_EDGE] llap SHUFFLE [RS_21] PartitionCols:_col0 - Select Operator [SEL_16] (rows=1 width=89) + Select Operator [SEL_6] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=1 width=93) + Group By Operator [GBY_5] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_14] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_13] (rows=1 width=93) + Group By Operator [GBY_3] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_39] (rows=2 width=93) + Filter Operator [FIL_38] (rows=2 width=93) predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) - TableScan [TS_10] (rows=20 width=88) + TableScan [TS_0] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_22] + PartitionCols:_col5 + Select Operator [SEL_20] (rows=1 width=117) + Output:["_col1","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_64] (rows=1 width=117) + Conds:RS_17._col0=RS_18._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"],residual filter predicates:{((_col6 > 0) or _col2)} {(_col3 or (_col7 >= 1L))} {((_col4 + _col7) >= 0L)} + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=18 width=99) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_39] (rows=18 width=84) + predicate:((c_int > 0) and key is not null) + TableScan [TS_7] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_16] (rows=1 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_15] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_13] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_40] (rows=2 width=93) + predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) + TableScan [TS_10] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b+c, a desc) cbo_t1 right outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p right outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 2) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c PREHOOK: type: QUERY @@ -626,10 +630,10 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 @@ -637,64 +641,66 @@ Stage-0 limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_28] - Group By Operator [GBY_26] (rows=1 width=20) + File Output Operator [FS_29] + Group By Operator [GBY_27] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + SHUFFLE [RS_26] PartitionCols:_col0, _col1 - Group By Operator [GBY_24] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col5 - Select Operator [SEL_23] (rows=1 width=20) - Output:["_col1","_col5"] - Merge Join Operator [MERGEJOIN_61] (rows=1 width=20) - Conds:RS_20._col3=RS_21._col0(Inner),Output:["_col1","_col4","_col5","_col7"],residual filter predicates:{((_col4 + _col7) >= 2)} + Group By Operator [GBY_25] (rows=1 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col3, _col7 + Select Operator [SEL_24] (rows=1 width=20) + Output:["_col3","_col7"] + Merge Join Operator [MERGEJOIN_62] (rows=1 width=20) + Conds:RS_21._col0=RS_22._col3(Inner),Output:["_col1","_col3","_col6","_col7"],residual filter predicates:{((_col6 + _col1) >= 2)} <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_60] (rows=2 width=105) - Conds:RS_17._col0=RS_18._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"],residual filter predicates:{((_col4 > 0) or _col2)} - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_34] (rows=18 width=84) - predicate:key is not null - TableScan [TS_0] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=1 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_35] (rows=2 width=93) - predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 8 [SIMPLE_EDGE] llap SHUFFLE [RS_21] PartitionCols:_col0 - Select Operator [SEL_16] (rows=1 width=89) + Select Operator [SEL_6] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=1 width=93) + Group By Operator [GBY_5] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_14] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_13] (rows=1 width=93) + Group By Operator [GBY_3] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_36] (rows=2 width=93) + Filter Operator [FIL_35] (rows=2 width=93) predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) - TableScan [TS_10] (rows=20 width=88) + TableScan [TS_0] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_22] + PartitionCols:_col3 + Select Operator [SEL_20] (rows=2 width=105) + Output:["_col1","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_61] (rows=2 width=105) + Conds:RS_17._col0=RS_18._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"],residual filter predicates:{((_col4 > 0) or _col2)} + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=18 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_36] (rows=18 width=84) + predicate:key is not null + TableScan [TS_7] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_16] (rows=1 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_15] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_13] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_37] (rows=2 width=93) + predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) + TableScan [TS_10] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by c+a desc) cbo_t1 full outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by p+q desc, r asc) cbo_t2 on cbo_t1.a=p full outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int PREHOOK: type: QUERY @@ -715,11 +721,11 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 @@ -727,68 +733,70 @@ Stage-0 limit:-1 Stage-1 Reducer 5 llap - File Output Operator [FS_30] - Select Operator [SEL_29] (rows=1 width=20) + File Output Operator [FS_31] + Select Operator [SEL_30] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] - Group By Operator [GBY_26] (rows=1 width=20) + SHUFFLE [RS_29] + Group By Operator [GBY_27] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + SHUFFLE [RS_26] PartitionCols:_col0, _col1 - Group By Operator [GBY_24] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col7 - Select Operator [SEL_23] (rows=1 width=20) - Output:["_col1","_col7"] - Merge Join Operator [MERGEJOIN_63] (rows=1 width=20) - Conds:RS_20._col5=RS_21._col0(Inner),Output:["_col1","_col6","_col7","_col9"],residual filter predicates:{((_col6 + _col9) >= 0)} + Group By Operator [GBY_25] (rows=1 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col3, _col9 + Select Operator [SEL_24] (rows=1 width=20) + Output:["_col3","_col9"] + Merge Join Operator [MERGEJOIN_64] (rows=1 width=20) + Conds:RS_21._col0=RS_22._col5(Inner),Output:["_col1","_col3","_col8","_col9"],residual filter predicates:{((_col8 + _col1) >= 0)} <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_62] (rows=1 width=117) - Conds:RS_17._col0=RS_18._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"],residual filter predicates:{((_col6 > 0) or _col2)} {(_col3 or (_col7 >= 1L))} {((_col4 + _col7) >= 0L)} - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=99) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_36] (rows=18 width=84) - predicate:((c_int > 0) and key is not null) - TableScan [TS_0] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=1 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_37] (rows=2 width=93) - predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 9 [SIMPLE_EDGE] llap SHUFFLE [RS_21] PartitionCols:_col0 - Select Operator [SEL_16] (rows=1 width=89) + Select Operator [SEL_6] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=1 width=93) + Group By Operator [GBY_5] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_14] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_13] (rows=1 width=93) + Group By Operator [GBY_3] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_38] (rows=2 width=93) + Filter Operator [FIL_37] (rows=2 width=93) predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) - TableScan [TS_10] (rows=20 width=88) + TableScan [TS_0] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_22] + PartitionCols:_col5 + Select Operator [SEL_20] (rows=1 width=117) + Output:["_col1","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_63] (rows=1 width=117) + Conds:RS_17._col0=RS_18._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"],residual filter predicates:{((_col6 > 0) or _col2)} {(_col3 or (_col7 >= 1L))} {((_col4 + _col7) >= 0L)} + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=18 width=99) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_38] (rows=18 width=84) + predicate:((c_int > 0) and key is not null) + TableScan [TS_7] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_16] (rows=1 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_15] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_13] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_39] (rows=2 width=93) + predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) + TableScan [TS_10] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c PREHOOK: type: QUERY @@ -809,10 +817,10 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 @@ -820,64 +828,66 @@ Stage-0 limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_28] - Group By Operator [GBY_26] (rows=1 width=20) + File Output Operator [FS_29] + Group By Operator [GBY_27] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + SHUFFLE [RS_26] PartitionCols:_col0, _col1 - Group By Operator [GBY_24] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col5 - Select Operator [SEL_23] (rows=1 width=20) - Output:["_col1","_col5"] - Merge Join Operator [MERGEJOIN_61] (rows=1 width=20) - Conds:RS_20._col3=RS_21._col0(Inner),Output:["_col1","_col4","_col5","_col7"],residual filter predicates:{((_col4 + _col7) >= 0)} + Group By Operator [GBY_25] (rows=1 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col3, _col7 + Select Operator [SEL_24] (rows=1 width=20) + Output:["_col3","_col7"] + Merge Join Operator [MERGEJOIN_62] (rows=1 width=20) + Conds:RS_21._col0=RS_22._col3(Inner),Output:["_col1","_col3","_col6","_col7"],residual filter predicates:{((_col6 + _col1) >= 0)} <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_60] (rows=2 width=105) - Conds:RS_17._col0=RS_18._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"],residual filter predicates:{((_col4 > 0) or _col2)} - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_34] (rows=18 width=84) - predicate:key is not null - TableScan [TS_0] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=1 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_35] (rows=2 width=93) - predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 8 [SIMPLE_EDGE] llap SHUFFLE [RS_21] PartitionCols:_col0 - Select Operator [SEL_16] (rows=1 width=89) + Select Operator [SEL_6] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=1 width=93) + Group By Operator [GBY_5] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_14] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_13] (rows=1 width=93) + Group By Operator [GBY_3] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_36] (rows=2 width=93) + Filter Operator [FIL_35] (rows=2 width=93) predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) - TableScan [TS_10] (rows=20 width=88) + TableScan [TS_0] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_22] + PartitionCols:_col3 + Select Operator [SEL_20] (rows=2 width=105) + Output:["_col1","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_61] (rows=2 width=105) + Conds:RS_17._col0=RS_18._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"],residual filter predicates:{((_col4 > 0) or _col2)} + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=18 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_36] (rows=18 width=84) + predicate:key is not null + TableScan [TS_7] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_16] (rows=1 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_15] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_13] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_37] (rows=2 width=93) + predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) + TableScan [TS_10] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc PREHOOK: type: QUERY @@ -1342,50 +1352,50 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap - File Output Operator [FS_16] - Select Operator [SEL_15] (rows=24 width=100) + Reducer 2 llap + File Output Operator [FS_17] + Select Operator [SEL_16] (rows=24 width=100) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_49] (rows=24 width=104) - Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7"],residual filter predicates:{((_col1 > 0) or _col7)} {((_col1 > 0) or (_col6 >= 0))} - <-Map 5 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_50] (rows=24 width=104) + Conds:RS_13._col0=RS_14._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7"],residual filter predicates:{((_col4 > 0) or _col2)} {((_col4 > 0) or (_col1 >= 0))} + <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=18 width=88) + Select Operator [SEL_2] (rows=18 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_24] (rows=18 width=84) + Filter Operator [FIL_23] (rows=18 width=84) predicate:key is not null - TableScan [TS_6] (rows=20 width=84) + TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_48] (rows=8 width=182) + Merge Join Operator [MERGEJOIN_49] (rows=8 width=182) Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"],residual filter predicates:{((_col1 + _col4) = 2)} - <-Map 1 [SIMPLE_EDGE] llap + <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_2] (rows=9 width=93) + Select Operator [SEL_5] (rows=9 width=93) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_22] (rows=9 width=93) + Filter Operator [FIL_24] (rows=9 width=93) predicate:(((c_int > 0) or (c_float >= 0.0)) and key is not null and ((c_int + 1) = 2)) - TableScan [TS_0] (rows=20 width=88) + TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Map 4 [SIMPLE_EDGE] llap + <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_10] PartitionCols:_col0 - Select Operator [SEL_5] (rows=9 width=89) + Select Operator [SEL_8] (rows=9 width=89) Output:["_col0","_col1"] - Filter Operator [FIL_23] (rows=9 width=93) + Filter Operator [FIL_25] (rows=9 width=93) predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) = 2) and key is not null) - TableScan [TS_3] (rows=20 width=88) + TableScan [TS_6] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select * from (select q, b, cbo_t2.p, cbo_t1.c, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1 where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0)) cbo_t1 right outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2 where (cbo_t2.c_int + 1 == 2) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0)) cbo_t2 on cbo_t1.a=p right outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q == 2) and (b > 0 or c_int >= 0)) R where (q + 1 = 2) and (R.b > 0 or c_int >= 0) @@ -1407,50 +1417,50 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap - File Output Operator [FS_16] - Select Operator [SEL_15] (rows=24 width=100) + Reducer 2 llap + File Output Operator [FS_17] + Select Operator [SEL_16] (rows=24 width=100) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_49] (rows=24 width=104) - Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7"],residual filter predicates:{((_col1 > 0) or _col7)} {((_col1 > 0) or (_col6 >= 0))} - <-Map 5 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_50] (rows=24 width=104) + Conds:RS_13._col0=RS_14._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7"],residual filter predicates:{((_col4 > 0) or _col2)} {((_col4 > 0) or (_col1 >= 0))} + <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=18 width=88) + Select Operator [SEL_2] (rows=18 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_24] (rows=18 width=84) + Filter Operator [FIL_23] (rows=18 width=84) predicate:key is not null - TableScan [TS_6] (rows=20 width=84) + TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_48] (rows=8 width=182) + Merge Join Operator [MERGEJOIN_49] (rows=8 width=182) Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"],residual filter predicates:{((_col1 + _col4) = 2)} - <-Map 1 [SIMPLE_EDGE] llap + <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_2] (rows=9 width=93) + Select Operator [SEL_5] (rows=9 width=93) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_22] (rows=9 width=93) + Filter Operator [FIL_24] (rows=9 width=93) predicate:(((c_int > 0) or (c_float >= 0.0)) and key is not null and ((c_int + 1) = 2)) - TableScan [TS_0] (rows=20 width=88) + TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Map 4 [SIMPLE_EDGE] llap + <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_10] PartitionCols:_col0 - Select Operator [SEL_5] (rows=9 width=89) + Select Operator [SEL_8] (rows=9 width=89) Output:["_col0","_col1"] - Filter Operator [FIL_23] (rows=9 width=93) + Filter Operator [FIL_25] (rows=9 width=93) predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) = 2) and key is not null) - TableScan [TS_3] (rows=20 width=88) + TableScan [TS_6] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select key, (c_int+1)+2 as x, sum(c_int) from cbo_t1 group by c_float, cbo_t1.c_int, key order by x limit 1 @@ -1629,110 +1639,112 @@ POSTHOOK: Input: default@cbo_t3 Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 10 <- Map 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (SIMPLE_EDGE) +Reducer 8 <- Reducer 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 llap - File Output Operator [FS_44] - Limit [LIM_42] (rows=1 width=24) + Reducer 4 llap + File Output Operator [FS_45] + Limit [LIM_43] (rows=1 width=24) Number of rows:5 - Select Operator [SEL_41] (rows=1 width=24) + Select Operator [SEL_42] (rows=1 width=24) Output:["_col0","_col1","_col2"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_40] - Select Operator [SEL_39] (rows=1 width=24) + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_41] + Select Operator [SEL_40] (rows=1 width=24) Output:["_col0","_col1","_col2","_col3"] - Top N Key Operator [TNK_55] (rows=1 width=20) + Top N Key Operator [TNK_56] (rows=1 width=20) keys:(UDFToLong(_col1) + _col0), _col0,top n:5 - Group By Operator [GBY_38] (rows=1 width=20) + Group By Operator [GBY_39] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_37] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_38] PartitionCols:_col0, _col1 - Group By Operator [GBY_36] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col7 - Select Operator [SEL_35] (rows=2 width=20) - Output:["_col4","_col7"] - Merge Join Operator [MERGEJOIN_79] (rows=2 width=20) - Conds:RS_32._col2=RS_33._col0(Inner),Output:["_col4","_col5","_col7","_col8"],residual filter predicates:{(_col5 or _col8)} - <-Map 11 [SIMPLE_EDGE] llap + Group By Operator [GBY_37] (rows=1 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col7, _col1 + Select Operator [SEL_36] (rows=2 width=20) + Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_80] (rows=2 width=20) + Conds:RS_33._col0=RS_34._col2(Inner),Output:["_col1","_col2","_col7","_col8"],residual filter predicates:{(_col8 or _col2)} + <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_33] PartitionCols:_col0 - Select Operator [SEL_28] (rows=18 width=88) + Select Operator [SEL_2] (rows=18 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_54] (rows=18 width=84) + Filter Operator [FIL_51] (rows=18 width=84) predicate:key is not null - TableScan [TS_26] (rows=20 width=84) + TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_32] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_34] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_78] (rows=1 width=105) - Conds:RS_29._col0=RS_30._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"],residual filter predicates:{((_col3 + _col1) >= 0)} - <-Reducer 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_30] - PartitionCols:_col0 - Select Operator [SEL_25] (rows=2 width=101) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_24] (rows=2 width=97) + Select Operator [SEL_32] (rows=1 width=105) + Output:["_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_79] (rows=1 width=105) + Conds:RS_29._col0=RS_30._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"],residual filter predicates:{((_col3 + _col1) >= 0)} + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_30] + PartitionCols:_col0 + Select Operator [SEL_28] (rows=2 width=101) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_27] (rows=2 width=97) + predicate:_col0 is not null + Limit [LIM_25] (rows=3 width=97) + Number of rows:5 + Select Operator [SEL_24] (rows=3 width=97) + Output:["_col0","_col1","_col2"] + <-Reducer 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_23] + Select Operator [SEL_22] (rows=3 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_21] (rows=3 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_20] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_19] (rows=3 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Top N Key Operator [TNK_63] (rows=6 width=93) + keys:key, c_int, c_float,top n:5 + Filter Operator [FIL_55] (rows=6 width=93) + predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0)) + TableScan [TS_16] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_29] + PartitionCols:_col0 + Filter Operator [FIL_14] (rows=1 width=105) predicate:_col0 is not null - Limit [LIM_22] (rows=3 width=97) + Limit [LIM_12] (rows=3 width=76) Number of rows:5 - Select Operator [SEL_21] (rows=3 width=97) - Output:["_col0","_col1","_col2"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] - Select Operator [SEL_19] (rows=3 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_18] (rows=3 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_16] (rows=3 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Top N Key Operator [TNK_62] (rows=6 width=93) - keys:key, c_int, c_float,top n:5 + Select Operator [SEL_11] (rows=3 width=76) + Output:["_col0","_col1"] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_10] + Select Operator [SEL_9] (rows=3 width=76) + Output:["_col0","_col1","_col2","_col3"] + Top N Key Operator [TNK_57] (rows=3 width=101) + keys:(UDFToDouble((_col1 + 1)) / 10.0D), _col3,top n:5 + Group By Operator [GBY_8] (rows=3 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_6] (rows=3 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float Filter Operator [FIL_53] (rows=6 width=93) predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0)) - TableScan [TS_13] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_29] - PartitionCols:_col0 - Filter Operator [FIL_11] (rows=1 width=105) - predicate:_col0 is not null - Limit [LIM_9] (rows=3 width=76) - Number of rows:5 - Select Operator [SEL_8] (rows=3 width=76) - Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - Select Operator [SEL_6] (rows=3 width=76) - Output:["_col0","_col1","_col2","_col3"] - Top N Key Operator [TNK_56] (rows=3 width=101) - keys:(UDFToDouble((_col1 + 1)) / 10.0D), _col3,top n:5 - Group By Operator [GBY_5] (rows=3 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_3] (rows=3 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_51] (rows=6 width=93) - predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0)) - TableScan [TS_0] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + TableScan [TS_3] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t1.c_int from cbo_t1 left semi join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) PREHOOK: type: QUERY @@ -4644,42 +4656,44 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) +Map 2 <- Map 1 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Map 1 llap - File Output Operator [FS_16] - Map Join Operator [MAPJOIN_46] (rows=805 width=10) - Conds:MAPJOIN_45._col1=RS_13._col0(Inner),Output:["_col0"] - <-Map 3 [BROADCAST_EDGE] llap - BROADCAST [RS_13] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=166 width=10) - Output:["_col0"] - Filter Operator [FIL_24] (rows=166 width=10) - predicate:(value > 'val_450') - TableScan [TS_6] (rows=500 width=10) - default@src,src,Tbl:COMPLETE,Col:NONE,Output:["value"] - <-Map Join Operator [MAPJOIN_45] (rows=732 width=10) - Conds:SEL_2._col0=RS_10._col0(Inner),Output:["_col0","_col1"] - <-Map 2 [BROADCAST_EDGE] llap - BROADCAST [RS_10] + Map 2 llap + File Output Operator [FS_17] + Select Operator [SEL_16] (rows=805 width=10) + Output:["_col0"] + Map Join Operator [MAPJOIN_47] (rows=805 width=10) + Conds:RS_13._col0=MAPJOIN_46._col1(Inner),Output:["_col1"] + <-Map 1 [BROADCAST_EDGE] llap + BROADCAST [RS_13] PartitionCols:_col0 - Select Operator [SEL_5] (rows=25 width=7) + Select Operator [SEL_2] (rows=166 width=10) Output:["_col0"] - Filter Operator [FIL_23] (rows=25 width=7) - predicate:key is not null - TableScan [TS_3] (rows=25 width=7) - default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=666 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=666 width=10) - predicate:((value > 'val_450') and key is not null) - TableScan [TS_0] (rows=2000 width=10) - default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Filter Operator [FIL_23] (rows=166 width=10) + predicate:(value > 'val_450') + TableScan [TS_0] (rows=500 width=10) + default@src,src,Tbl:COMPLETE,Col:NONE,Output:["value"] + <-Map Join Operator [MAPJOIN_46] (rows=732 width=10) + Conds:SEL_5._col0=RS_10._col0(Inner),Output:["_col0","_col1"] + <-Map 3 [BROADCAST_EDGE] llap + BROADCAST [RS_10] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=25 width=7) + Output:["_col0"] + Filter Operator [FIL_25] (rows=25 width=7) + predicate:key is not null + TableScan [TS_6] (rows=25 width=7) + default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_5] (rows=666 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_24] (rows=666 width=10) + predicate:((value > 'val_450') and key is not null) + TableScan [TS_3] (rows=2000 width=10) + default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450' PREHOOK: type: QUERY @@ -4704,42 +4718,44 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) +Map 2 <- Map 1 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Map 1 llap - File Output Operator [FS_16] - Map Join Operator [MAPJOIN_46] (rows=805 width=10) - Conds:MAPJOIN_45._col1=RS_13._col0(Inner),Output:["_col0"] - <-Map 3 [BROADCAST_EDGE] llap - BROADCAST [RS_13] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=166 width=10) - Output:["_col0"] - Filter Operator [FIL_24] (rows=166 width=10) - predicate:(value > 'val_450') - TableScan [TS_6] (rows=500 width=10) - default@src,src,Tbl:COMPLETE,Col:NONE,Output:["value"] - <-Map Join Operator [MAPJOIN_45] (rows=732 width=10) - Conds:SEL_2._col0=RS_10._col0(Inner),Output:["_col0","_col1"] - <-Map 2 [BROADCAST_EDGE] llap - BROADCAST [RS_10] + Map 2 llap + File Output Operator [FS_17] + Select Operator [SEL_16] (rows=805 width=10) + Output:["_col0"] + Map Join Operator [MAPJOIN_47] (rows=805 width=10) + Conds:RS_13._col0=MAPJOIN_46._col1(Inner),Output:["_col1"] + <-Map 1 [BROADCAST_EDGE] llap + BROADCAST [RS_13] PartitionCols:_col0 - Select Operator [SEL_5] (rows=25 width=7) + Select Operator [SEL_2] (rows=166 width=10) Output:["_col0"] - Filter Operator [FIL_23] (rows=25 width=7) - predicate:key is not null - TableScan [TS_3] (rows=25 width=7) - default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=666 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=666 width=10) - predicate:((value > 'val_450') and key is not null) - TableScan [TS_0] (rows=2000 width=10) - default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Filter Operator [FIL_23] (rows=166 width=10) + predicate:(value > 'val_450') + TableScan [TS_0] (rows=500 width=10) + default@src,src,Tbl:COMPLETE,Col:NONE,Output:["value"] + <-Map Join Operator [MAPJOIN_46] (rows=732 width=10) + Conds:SEL_5._col0=RS_10._col0(Inner),Output:["_col0","_col1"] + <-Map 3 [BROADCAST_EDGE] llap + BROADCAST [RS_10] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=25 width=7) + Output:["_col0"] + Filter Operator [FIL_25] (rows=25 width=7) + predicate:key is not null + TableScan [TS_6] (rows=25 width=7) + default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_5] (rows=666 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_24] (rows=666 width=10) + predicate:((value > 'val_450') and key is not null) + TableScan [TS_3] (rows=2000 width=10) + default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: explain select p_mfgr, p_name, p_size, diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out index 6f275c6ecf..12b0e1f19c 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -193,50 +193,50 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap - File Output Operator [FS_16] - Select Operator [SEL_15] (rows=63 width=268) + Reducer 2 llap + File Output Operator [FS_17] + Select Operator [SEL_16] (rows=63 width=268) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_46] (rows=63 width=268) - Conds:RS_12._col3=RS_55._col0(Inner),Output:["_col1","_col2","_col4"] - <-Map 5 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_55] + Merge Join Operator [MERGEJOIN_47] (rows=63 width=268) + Conds:RS_50._col0=RS_14._col3(Inner),Output:["_col0","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_50] PartitionCols:_col0 - Select Operator [SEL_54] (rows=500 width=91) + Select Operator [SEL_49] (rows=500 width=91) Output:["_col0"] - Filter Operator [FIL_53] (rows=500 width=91) + Filter Operator [FIL_48] (rows=500 width=91) predicate:value is not null - TableScan [TS_6] (rows=500 width=91) + TableScan [TS_0] (rows=500 width=91) default@srcpart,z,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_45] (rows=39 width=266) - Conds:RS_49._col0=RS_52._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_49] + Merge Join Operator [MERGEJOIN_46] (rows=39 width=266) + Conds:RS_53._col0=RS_56._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 3 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_53] PartitionCols:_col0 - Select Operator [SEL_48] (rows=500 width=178) + Select Operator [SEL_52] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_47] (rows=500 width=178) + Filter Operator [FIL_51] (rows=500 width=178) predicate:key is not null - TableScan [TS_0] (rows=500 width=178) + TableScan [TS_3] (rows=500 width=178) default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_52] + <-Map 5 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_56] PartitionCols:_col0 - Select Operator [SEL_51] (rows=25 width=175) + Select Operator [SEL_55] (rows=25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_50] (rows=25 width=175) + Filter Operator [FIL_54] (rows=25 width=175) predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25 width=175) + TableScan [TS_6] (rows=25 width=175) default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: EXPLAIN @@ -326,132 +326,130 @@ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 7 <- Map 12 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 7 <- Map 14 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 8 <- Map 13 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 14 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 9 <- Map 12 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 5 vectorized, llap - File Output Operator [FS_222] - Limit [LIM_221] (rows=2 width=285) + File Output Operator [FS_224] + Limit [LIM_223] (rows=2 width=285) Number of rows:100 - Select Operator [SEL_220] (rows=2 width=285) + Select Operator [SEL_222] (rows=2 width=285) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_219] - Group By Operator [GBY_218] (rows=2 width=285) + SHUFFLE [RS_221] + Group By Operator [GBY_220] (rows=2 width=285) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_49] + SHUFFLE [RS_51] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_48] (rows=2 width=285) + Group By Operator [GBY_50] (rows=2 width=285) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col8)","count(_col15)","count(_col3)"],keys:_col7, _col14, _col2 - Top N Key Operator [TNK_94] (rows=28 width=534) + Top N Key Operator [TNK_96] (rows=28 width=534) keys:_col7, _col14, _col2,top n:100 - Merge Join Operator [MERGEJOIN_193] (rows=28 width=534) - Conds:RS_44._col1, _col3=RS_45._col10, _col12(Inner),Output:["_col2","_col3","_col7","_col8","_col14","_col15"] + Merge Join Operator [MERGEJOIN_195] (rows=28 width=534) + Conds:RS_46._col1, _col3=RS_47._col10, _col12(Inner),Output:["_col2","_col3","_col7","_col8","_col14","_col15"] <-Reducer 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_45] + SHUFFLE [RS_47] PartitionCols:_col10, _col12 - Select Operator [SEL_40] (rows=2 width=447) - Output:["_col2","_col3","_col9","_col10","_col12"] - Merge Join Operator [MERGEJOIN_192] (rows=2 width=447) - Conds:RS_37._col2, _col4=RS_38._col1, _col3(Inner),Output:["_col0","_col1","_col9","_col10","_col12"] - <-Reducer 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_38] - PartitionCols:_col1, _col3 - Merge Join Operator [MERGEJOIN_191] (rows=5 width=356) - Conds:RS_217._col0=RS_205._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 6 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_205] - PartitionCols:_col0 - Select Operator [SEL_202] (rows=5 width=87) - Output:["_col0"] - Filter Operator [FIL_199] (rows=5 width=178) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_3] (rows=500 width=178) - default@src,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 15 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_217] - PartitionCols:_col0 - Select Operator [SEL_216] (rows=7 width=443) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_215] (rows=7 width=534) - predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) - TableScan [TS_18] (rows=85 width=534) - default@sr,sr,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v1","k2","v2","k3","v3"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_37] - PartitionCols:_col2, _col4 - Merge Join Operator [MERGEJOIN_190] (rows=2 width=352) - Conds:RS_34._col1=RS_214._col0(Inner),Output:["_col0","_col1","_col2","_col4"] - <-Map 14 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_214] - PartitionCols:_col0 - Select Operator [SEL_213] (rows=2 width=89) - Output:["_col0"] - Filter Operator [FIL_212] (rows=2 width=175) - predicate:((key = 'src1key') and value is not null) - TableScan [TS_15] (rows=25 width=175) - default@src1,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_34] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_189] (rows=2 width=352) - Conds:RS_31._col3=RS_211._col0(Inner),Output:["_col0","_col1","_col2","_col4"] - <-Map 13 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_211] - PartitionCols:_col0 - Select Operator [SEL_210] (rows=6 width=91) - Output:["_col0"] - Filter Operator [FIL_209] (rows=6 width=178) - predicate:((key = 'srcpartkey') and value is not null) - TableScan [TS_12] (rows=2000 width=178) - default@srcpart,srcpart,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_31] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_188] (rows=2 width=443) - Conds:RS_208._col0=RS_204._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Map 6 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_204] - PartitionCols:_col0 - Select Operator [SEL_201] (rows=2 width=87) - Output:["_col0"] - Filter Operator [FIL_198] (rows=2 width=178) - predicate:((value = 'd1value') and key is not null) - Please refer to the previous TableScan [TS_3] - <-Map 12 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_208] - PartitionCols:_col0 - Select Operator [SEL_207] (rows=7 width=443) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_206] (rows=7 width=534) - predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) - TableScan [TS_6] (rows=85 width=534) - default@ss_n1,ss_n1,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v1","k2","v2","k3","v3"] + Merge Join Operator [MERGEJOIN_194] (rows=2 width=447) + Conds:RS_39._col4, _col6=RS_40._col1, _col3(Inner),Output:["_col2","_col3","_col9","_col10","_col12"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_40] + PartitionCols:_col1, _col3 + Merge Join Operator [MERGEJOIN_193] (rows=5 width=356) + Conds:RS_219._col0=RS_207._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 6 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_207] + PartitionCols:_col0 + Select Operator [SEL_204] (rows=5 width=87) + Output:["_col0"] + Filter Operator [FIL_201] (rows=5 width=178) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + TableScan [TS_3] (rows=500 width=178) + default@src,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 15 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_219] + PartitionCols:_col0 + Select Operator [SEL_218] (rows=7 width=443) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_217] (rows=7 width=534) + predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) + TableScan [TS_26] (rows=85 width=534) + default@sr,sr,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v1","k2","v2","k3","v3"] + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_39] + PartitionCols:_col4, _col6 + Merge Join Operator [MERGEJOIN_192] (rows=2 width=352) + Conds:RS_216._col0=RS_37._col2(Inner),Output:["_col2","_col3","_col4","_col6"] + <-Map 12 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_216] + PartitionCols:_col0 + Select Operator [SEL_215] (rows=2 width=89) + Output:["_col0"] + Filter Operator [FIL_214] (rows=2 width=175) + predicate:((key = 'src1key') and value is not null) + TableScan [TS_6] (rows=25 width=175) + default@src1,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_37] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_191] (rows=2 width=352) + Conds:RS_213._col0=RS_23._col3(Inner),Output:["_col1","_col2","_col3","_col5"] + <-Map 13 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_213] + PartitionCols:_col0 + Select Operator [SEL_212] (rows=6 width=91) + Output:["_col0"] + Filter Operator [FIL_211] (rows=6 width=178) + predicate:((key = 'srcpartkey') and value is not null) + TableScan [TS_9] (rows=2000 width=178) + default@srcpart,srcpart,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_23] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_190] (rows=2 width=443) + Conds:RS_210._col0=RS_206._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Map 6 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_206] + PartitionCols:_col0 + Select Operator [SEL_203] (rows=2 width=87) + Output:["_col0"] + Filter Operator [FIL_200] (rows=2 width=178) + predicate:((value = 'd1value') and key is not null) + Please refer to the previous TableScan [TS_3] + <-Map 14 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_210] + PartitionCols:_col0 + Select Operator [SEL_209] (rows=7 width=443) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_208] (rows=7 width=534) + predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) + TableScan [TS_12] (rows=85 width=534) + default@ss_n1,ss_n1,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v1","k2","v2","k3","v3"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_44] + SHUFFLE [RS_46] PartitionCols:_col1, _col3 - Merge Join Operator [MERGEJOIN_187] (rows=70 width=269) - Conds:RS_196._col0=RS_203._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_189] (rows=70 width=269) + Conds:RS_198._col0=RS_205._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 6 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_203] + SHUFFLE [RS_205] PartitionCols:_col0 - Select Operator [SEL_200] (rows=5 width=87) + Select Operator [SEL_202] (rows=5 width=87) Output:["_col0"] - Filter Operator [FIL_197] (rows=5 width=178) + Filter Operator [FIL_199] (rows=5 width=178) predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_196] + SHUFFLE [RS_198] PartitionCols:_col0 - Select Operator [SEL_195] (rows=170 width=356) + Select Operator [SEL_197] (rows=170 width=356) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_194] (rows=170 width=356) + Filter Operator [FIL_196] (rows=170 width=356) predicate:(v2 is not null and v3 is not null and k1 is not null) TableScan [TS_0] (rows=170 width=356) default@cs,cs,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v2","k3","v3"] @@ -483,141 +481,141 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 12 <- Union 10 (CONTAINS) +Map 1 <- Union 2 (CONTAINS) Map 13 <- Union 14 (CONTAINS) Map 16 <- Union 14 (CONTAINS) -Map 9 <- Union 10 (CONTAINS) -Reducer 11 <- Union 10 (SIMPLE_EDGE) +Map 7 <- Union 2 (CONTAINS) +Reducer 10 <- Map 12 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 15 <- Union 14 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 5 <- Union 4 (SIMPLE_EDGE) -Reducer 6 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 7 <- Reducer 15 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 9 <- Map 12 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 5 vectorized, llap - File Output Operator [FS_172] - Group By Operator [GBY_171] (rows=39 width=268) + Reducer 6 vectorized, llap + File Output Operator [FS_178] + Group By Operator [GBY_177] (rows=39 width=268) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 4 [SIMPLE_EDGE] - <-Reducer 3 [CONTAINS] llap - Reduce Output Operator [RS_130] + <-Union 5 [SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] llap + Reduce Output Operator [RS_149] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_129] (rows=39 width=268) + Group By Operator [GBY_148] (rows=39 width=268) Output:["_col0","_col1","_col2"],keys:_col0, _col1, _col2 - Select Operator [SEL_127] (rows=39 width=268) + Select Operator [SEL_146] (rows=39 width=268) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_126] (rows=39 width=268) - Conds:RS_22._col3=RS_170._col0(Inner),Output:["_col1","_col2","_col4"] - <-Reducer 11 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_170] + Merge Join Operator [MERGEJOIN_145] (rows=39 width=268) + Conds:RS_185._col0=RS_51._col3(Inner),Output:["_col0","_col2","_col3"] + <-Reducer 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_51] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_125] (rows=39 width=266) + Conds:RS_172._col0=RS_176._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_176] + PartitionCols:_col0 + Select Operator [SEL_174] (rows=25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_173] (rows=25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_16] (rows=25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 8 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_172] + PartitionCols:_col0 + Select Operator [SEL_170] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_169] (rows=500 width=178) + predicate:key is not null + TableScan [TS_13] (rows=500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 15 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_185] PartitionCols:_col0 - Select Operator [SEL_169] (rows=262 width=91) + Select Operator [SEL_184] (rows=262 width=91) Output:["_col0"] - Group By Operator [GBY_168] (rows=262 width=178) + Group By Operator [GBY_183] (rows=262 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 10 [SIMPLE_EDGE] - <-Map 12 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_183] + <-Union 14 [SIMPLE_EDGE] + <-Map 13 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_189] PartitionCols:_col0, _col1 - Group By Operator [GBY_182] (rows=262 width=178) + Group By Operator [GBY_188] (rows=262 width=178) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_181] (rows=500 width=178) + Select Operator [SEL_187] (rows=25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_180] (rows=500 width=178) + Filter Operator [FIL_186] (rows=25 width=175) predicate:value is not null - TableScan [TS_142] (rows=500 width=178) + TableScan [TS_150] (rows=25 width=175) Output:["key","value"] - <-Map 9 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_179] + <-Map 16 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_193] PartitionCols:_col0, _col1 - Group By Operator [GBY_178] (rows=262 width=178) + Group By Operator [GBY_192] (rows=262 width=178) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_177] (rows=25 width=175) + Select Operator [SEL_191] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_176] (rows=25 width=175) + Filter Operator [FIL_190] (rows=500 width=178) predicate:value is not null - TableScan [TS_136] (rows=25 width=175) + TableScan [TS_156] (rows=500 width=178) Output:["key","value"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_122] (rows=39 width=266) - Conds:RS_162._col0=RS_166._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_162] - PartitionCols:_col0 - Select Operator [SEL_161] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_160] (rows=500 width=178) - predicate:key is not null - TableScan [TS_0] (rows=500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 8 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_166] - PartitionCols:_col0 - Select Operator [SEL_165] (rows=25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_164] (rows=25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 7 [CONTAINS] llap - Reduce Output Operator [RS_135] + <-Reducer 4 [CONTAINS] llap + Reduce Output Operator [RS_138] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_134] (rows=39 width=268) + Group By Operator [GBY_137] (rows=39 width=268) Output:["_col0","_col1","_col2"],keys:_col0, _col1, _col2 - Select Operator [SEL_132] (rows=39 width=268) + Select Operator [SEL_135] (rows=39 width=268) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_131] (rows=39 width=268) - Conds:RS_48._col3=RS_175._col0(Inner),Output:["_col1","_col2","_col4"] - <-Reducer 15 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_175] + Merge Join Operator [MERGEJOIN_134] (rows=39 width=268) + Conds:RS_168._col0=RS_24._col3(Inner),Output:["_col0","_col2","_col3"] + <-Reducer 3 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_168] PartitionCols:_col0 - Select Operator [SEL_174] (rows=262 width=91) + Select Operator [SEL_167] (rows=262 width=91) Output:["_col0"] - Group By Operator [GBY_173] (rows=262 width=178) + Group By Operator [GBY_166] (rows=262 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 14 [SIMPLE_EDGE] - <-Map 13 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_187] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_165] PartitionCols:_col0, _col1 - Group By Operator [GBY_186] (rows=262 width=178) + Group By Operator [GBY_164] (rows=262 width=178) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_185] (rows=25 width=175) + Select Operator [SEL_163] (rows=25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_184] (rows=25 width=175) + Filter Operator [FIL_162] (rows=25 width=175) predicate:value is not null - TableScan [TS_148] (rows=25 width=175) + TableScan [TS_128] (rows=25 width=175) Output:["key","value"] - <-Map 16 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_191] + <-Map 7 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_182] PartitionCols:_col0, _col1 - Group By Operator [GBY_190] (rows=262 width=178) + Group By Operator [GBY_181] (rows=262 width=178) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_189] (rows=500 width=178) + Select Operator [SEL_180] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_188] (rows=500 width=178) + Filter Operator [FIL_179] (rows=500 width=178) predicate:value is not null - TableScan [TS_154] (rows=500 width=178) + TableScan [TS_139] (rows=500 width=178) Output:["key","value"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_48] + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_24] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_123] (rows=39 width=266) - Conds:RS_163._col0=RS_167._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_163] + Merge Join Operator [MERGEJOIN_124] (rows=39 width=266) + Conds:RS_171._col0=RS_175._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_175] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_161] + Please refer to the previous Select Operator [SEL_174] <-Map 8 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_167] + SHUFFLE [RS_171] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_165] + Please refer to the previous Select Operator [SEL_170] PREHOOK: query: explain SELECT x.key, y.value @@ -654,8 +652,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 12 <- Union 13 (CONTAINS) -Map 15 <- Union 13 (CONTAINS) +Map 1 <- Union 2 (CONTAINS) Map 16 <- Union 17 (CONTAINS) Map 21 <- Union 17 (CONTAINS) Map 22 <- Union 19 (CONTAINS) @@ -663,257 +660,258 @@ Map 23 <- Union 24 (CONTAINS) Map 30 <- Union 24 (CONTAINS) Map 31 <- Union 26 (CONTAINS) Map 32 <- Union 28 (CONTAINS) -Reducer 10 <- Reducer 20 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 14 <- Union 13 (SIMPLE_EDGE) +Map 9 <- Union 2 (CONTAINS) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 13 <- Map 10 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 18 <- Union 17 (SIMPLE_EDGE), Union 19 (CONTAINS) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) Reducer 20 <- Union 19 (SIMPLE_EDGE) Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS) Reducer 27 <- Union 26 (SIMPLE_EDGE), Union 28 (CONTAINS) Reducer 29 <- Union 28 (SIMPLE_EDGE) -Reducer 3 <- Reducer 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 5 <- Union 4 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 7 <- Union 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 9 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 3 <- Union 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 8 <- Union 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 vectorized, llap - File Output Operator [FS_334] - Group By Operator [GBY_333] (rows=49 width=177) + Reducer 8 vectorized, llap + File Output Operator [FS_341] + Group By Operator [GBY_340] (rows=49 width=177) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 6 [SIMPLE_EDGE] - <-Reducer 5 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_332] + <-Union 7 [SIMPLE_EDGE] + <-Reducer 12 [CONTAINS] llap + Reduce Output Operator [RS_259] PartitionCols:_col0, _col1 - Group By Operator [GBY_331] (rows=49 width=177) + Group By Operator [GBY_258] (rows=49 width=177) Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_330] (rows=43 width=177) + Select Operator [SEL_256] (rows=55 width=177) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_255] (rows=55 width=177) + Conds:RS_348._col0=RS_115._col3(Inner),Output:["_col2","_col3"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_115] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_228] (rows=39 width=266) + Conds:RS_331._col0=RS_335._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 10 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_331] + PartitionCols:_col0 + Select Operator [SEL_330] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_329] (rows=500 width=178) + predicate:key is not null + TableScan [TS_13] (rows=500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 15 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_335] + PartitionCols:_col0 + Select Operator [SEL_334] (rows=25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_333] (rows=25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_16] (rows=25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 29 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_348] + PartitionCols:_col0 + Select Operator [SEL_347] (rows=440 width=91) + Output:["_col0"] + Group By Operator [GBY_346] (rows=440 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 28 [SIMPLE_EDGE] + <-Map 32 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_391] + PartitionCols:_col0, _col1 + Group By Operator [GBY_390] (rows=440 width=178) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_389] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_388] (rows=500 width=178) + predicate:value is not null + TableScan [TS_316] (rows=500 width=178) + Output:["key","value"] + <-Reducer 27 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_379] + PartitionCols:_col0, _col1 + Group By Operator [GBY_378] (rows=440 width=178) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_377] (rows=381 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_376] (rows=381 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 26 [SIMPLE_EDGE] + <-Map 31 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_387] + PartitionCols:_col0, _col1 + Group By Operator [GBY_386] (rows=381 width=178) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_385] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_384] (rows=500 width=178) + predicate:value is not null + TableScan [TS_310] (rows=500 width=178) + Output:["key","value"] + <-Reducer 25 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_375] + PartitionCols:_col0, _col1 + Group By Operator [GBY_374] (rows=381 width=178) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_373] (rows=262 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_372] (rows=262 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 24 [SIMPLE_EDGE] + <-Map 23 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_371] + PartitionCols:_col0, _col1 + Group By Operator [GBY_370] (rows=262 width=178) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_369] (rows=25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_368] (rows=25 width=175) + predicate:value is not null + TableScan [TS_288] (rows=25 width=175) + Output:["key","value"] + <-Map 30 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_383] + PartitionCols:_col0, _col1 + Group By Operator [GBY_382] (rows=262 width=178) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_381] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_380] (rows=500 width=178) + predicate:value is not null + TableScan [TS_304] (rows=500 width=178) + Output:["key","value"] + <-Reducer 6 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_339] + PartitionCols:_col0, _col1 + Group By Operator [GBY_338] (rows=49 width=177) + Output:["_col0","_col1"],keys:_col0, _col1 + Group By Operator [GBY_337] (rows=43 width=177) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 4 [SIMPLE_EDGE] - <-Reducer 10 [CONTAINS] llap - Reduce Output Operator [RS_249] + <-Union 5 [SIMPLE_EDGE] + <-Reducer 14 [CONTAINS] llap + Reduce Output Operator [RS_264] PartitionCols:_col0, _col1 - Group By Operator [GBY_248] (rows=43 width=177) + Group By Operator [GBY_263] (rows=43 width=177) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_246] (rows=48 width=177) + Select Operator [SEL_261] (rows=48 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_245] (rows=48 width=177) - Conds:RS_58._col3=RS_340._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_260] (rows=48 width=177) + Conds:RS_351._col0=RS_61._col3(Inner),Output:["_col2","_col3"] + <-Reducer 13 [SIMPLE_EDGE] llap + SHUFFLE [RS_61] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_229] (rows=39 width=266) + Conds:RS_332._col0=RS_336._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 10 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_332] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_330] + <-Map 15 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_336] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_334] <-Reducer 20 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_340] + SHUFFLE [RS_351] PartitionCols:_col0 - Select Operator [SEL_339] (rows=381 width=91) + Select Operator [SEL_350] (rows=381 width=91) Output:["_col0"] - Group By Operator [GBY_338] (rows=381 width=178) + Group By Operator [GBY_349] (rows=381 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 19 [SIMPLE_EDGE] <-Map 22 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_364] + Reduce Output Operator [RS_367] PartitionCols:_col0, _col1 - Group By Operator [GBY_363] (rows=381 width=178) + Group By Operator [GBY_366] (rows=381 width=178) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_362] (rows=500 width=178) + Select Operator [SEL_365] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_361] (rows=500 width=178) + Filter Operator [FIL_364] (rows=500 width=178) predicate:value is not null - TableScan [TS_279] (rows=500 width=178) + TableScan [TS_282] (rows=500 width=178) Output:["key","value"] <-Reducer 18 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_356] + Reduce Output Operator [RS_359] PartitionCols:_col0, _col1 - Group By Operator [GBY_355] (rows=381 width=178) + Group By Operator [GBY_358] (rows=381 width=178) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_354] (rows=262 width=178) + Select Operator [SEL_357] (rows=262 width=178) Output:["_col0","_col1"] - Group By Operator [GBY_353] (rows=262 width=178) + Group By Operator [GBY_356] (rows=262 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 17 [SIMPLE_EDGE] <-Map 16 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_352] + Reduce Output Operator [RS_355] PartitionCols:_col0, _col1 - Group By Operator [GBY_351] (rows=262 width=178) + Group By Operator [GBY_354] (rows=262 width=178) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_350] (rows=25 width=175) + Select Operator [SEL_353] (rows=25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_349] (rows=25 width=175) + Filter Operator [FIL_352] (rows=25 width=175) predicate:value is not null - TableScan [TS_262] (rows=25 width=175) + TableScan [TS_265] (rows=25 width=175) Output:["key","value"] <-Map 21 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_360] + Reduce Output Operator [RS_363] PartitionCols:_col0, _col1 - Group By Operator [GBY_359] (rows=262 width=178) + Group By Operator [GBY_362] (rows=262 width=178) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_358] (rows=500 width=178) + Select Operator [SEL_361] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_357] (rows=500 width=178) + Filter Operator [FIL_360] (rows=500 width=178) predicate:value is not null - TableScan [TS_273] (rows=500 width=178) + TableScan [TS_276] (rows=500 width=178) Output:["key","value"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_58] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_226] (rows=39 width=266) - Conds:RS_322._col0=RS_326._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_322] - PartitionCols:_col0 - Select Operator [SEL_320] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_319] (rows=500 width=178) - predicate:key is not null - TableScan [TS_0] (rows=500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 11 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_326] - PartitionCols:_col0 - Select Operator [SEL_324] (rows=25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_323] (rows=25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 3 [CONTAINS] llap - Reduce Output Operator [RS_235] + <-Reducer 4 [CONTAINS] llap + Reduce Output Operator [RS_244] PartitionCols:_col0, _col1 - Group By Operator [GBY_234] (rows=43 width=177) + Group By Operator [GBY_243] (rows=43 width=177) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_232] (rows=39 width=177) + Select Operator [SEL_241] (rows=39 width=177) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_231] (rows=39 width=177) - Conds:RS_22._col3=RS_329._col0(Inner),Output:["_col1","_col2"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] + Merge Join Operator [MERGEJOIN_240] (rows=39 width=177) + Conds:RS_328._col0=RS_24._col3(Inner),Output:["_col2","_col3"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_24] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_225] (rows=39 width=266) - Conds:RS_321._col0=RS_325._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_321] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_320] - <-Map 11 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_325] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_324] - <-Reducer 14 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_329] + Please refer to the previous Merge Join Operator [MERGEJOIN_228] + <-Reducer 3 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_328] PartitionCols:_col0 - Select Operator [SEL_328] (rows=262 width=91) + Select Operator [SEL_327] (rows=262 width=91) Output:["_col0"] - Group By Operator [GBY_327] (rows=262 width=178) + Group By Operator [GBY_326] (rows=262 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 13 [SIMPLE_EDGE] - <-Map 12 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_344] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_325] PartitionCols:_col0, _col1 - Group By Operator [GBY_343] (rows=262 width=178) + Group By Operator [GBY_324] (rows=262 width=178) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_342] (rows=25 width=175) + Select Operator [SEL_323] (rows=25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_341] (rows=25 width=175) + Filter Operator [FIL_322] (rows=25 width=175) predicate:value is not null - TableScan [TS_250] (rows=25 width=175) + TableScan [TS_234] (rows=25 width=175) Output:["key","value"] - <-Map 15 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_348] + <-Map 9 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_345] PartitionCols:_col0, _col1 - Group By Operator [GBY_347] (rows=262 width=178) + Group By Operator [GBY_344] (rows=262 width=178) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_346] (rows=500 width=178) + Select Operator [SEL_343] (rows=500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_345] (rows=500 width=178) + Filter Operator [FIL_342] (rows=500 width=178) predicate:value is not null - TableScan [TS_256] (rows=500 width=178) + TableScan [TS_249] (rows=500 width=178) Output:["key","value"] - <-Reducer 8 [CONTAINS] llap - Reduce Output Operator [RS_244] - PartitionCols:_col0, _col1 - Group By Operator [GBY_243] (rows=49 width=177) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_241] (rows=55 width=177) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_240] (rows=55 width=177) - Conds:RS_111._col3=RS_337._col0(Inner),Output:["_col1","_col2"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_111] - PartitionCols:_col3 - Please refer to the previous Merge Join Operator [MERGEJOIN_225] - <-Reducer 29 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_337] - PartitionCols:_col0 - Select Operator [SEL_336] (rows=440 width=91) - Output:["_col0"] - Group By Operator [GBY_335] (rows=440 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 28 [SIMPLE_EDGE] - <-Map 32 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_388] - PartitionCols:_col0, _col1 - Group By Operator [GBY_387] (rows=440 width=178) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_386] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_385] (rows=500 width=178) - predicate:value is not null - TableScan [TS_313] (rows=500 width=178) - Output:["key","value"] - <-Reducer 27 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_376] - PartitionCols:_col0, _col1 - Group By Operator [GBY_375] (rows=440 width=178) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_374] (rows=381 width=178) - Output:["_col0","_col1"] - Group By Operator [GBY_373] (rows=381 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 26 [SIMPLE_EDGE] - <-Map 31 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_384] - PartitionCols:_col0, _col1 - Group By Operator [GBY_383] (rows=381 width=178) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_382] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_381] (rows=500 width=178) - predicate:value is not null - TableScan [TS_307] (rows=500 width=178) - Output:["key","value"] - <-Reducer 25 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_372] - PartitionCols:_col0, _col1 - Group By Operator [GBY_371] (rows=381 width=178) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_370] (rows=262 width=178) - Output:["_col0","_col1"] - Group By Operator [GBY_369] (rows=262 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 24 [SIMPLE_EDGE] - <-Map 23 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_368] - PartitionCols:_col0, _col1 - Group By Operator [GBY_367] (rows=262 width=178) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_366] (rows=25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_365] (rows=25 width=175) - predicate:value is not null - TableScan [TS_285] (rows=25 width=175) - Output:["key","value"] - <-Map 30 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_380] - PartitionCols:_col0, _col1 - Group By Operator [GBY_379] (rows=262 width=178) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_378] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_377] (rows=500 width=178) - predicate:value is not null - TableScan [TS_301] (rows=500 width=178) - Output:["key","value"] PREHOOK: query: EXPLAIN SELECT x.key, z.value, y.value @@ -938,43 +936,43 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) +Map 2 <- Map 1 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_60] - Select Operator [SEL_59] (rows=605 width=10) + Map 2 vectorized, llap + File Output Operator [FS_61] + Select Operator [SEL_60] (rows=605 width=10) Output:["_col0","_col1","_col2"] - Map Join Operator [MAPJOIN_58] (rows=605 width=10) - Conds:MAPJOIN_57._col1=RS_54._col0(Inner),Output:["_col0","_col1","_col4"] - <-Map 3 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_54] + Map Join Operator [MAPJOIN_59] (rows=605 width=10) + Conds:RS_52._col0=MAPJOIN_58._col1(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_52] PartitionCols:_col0 - Select Operator [SEL_53] (rows=500 width=10) + Select Operator [SEL_51] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_52] (rows=500 width=10) + Filter Operator [FIL_50] (rows=500 width=10) predicate:key is not null - TableScan [TS_6] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_57] (rows=550 width=10) - Conds:SEL_56._col0=RS_51._col1(Inner),Output:["_col0","_col1"] - <-Map 2 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_51] + <-Map Join Operator [MAPJOIN_58] (rows=550 width=10) + Conds:SEL_57._col0=RS_55._col1(Inner),Output:["_col0","_col1"] + <-Map 3 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_55] PartitionCols:_col1 - Select Operator [SEL_50] (rows=25 width=7) + Select Operator [SEL_54] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_49] (rows=25 width=7) + Filter Operator [FIL_53] (rows=25 width=7) predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25 width=7) + TableScan [TS_6] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_56] (rows=500 width=10) + <-Select Operator [SEL_57] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_55] (rows=500 width=10) + Filter Operator [FIL_56] (rows=500 width=10) predicate:value is not null - TableScan [TS_0] (rows=500 width=10) + TableScan [TS_3] (rows=500 width=10) default@srcpart,z,Tbl:COMPLETE,Col:NONE,Output:["value"] PREHOOK: query: EXPLAIN @@ -1059,117 +1057,115 @@ Plan optimized by CBO. Vertex dependency in root stage Map 2 <- Map 1 (BROADCAST_EDGE) -Map 3 <- Map 2 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) +Map 5 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) Map 9 <- Map 10 (BROADCAST_EDGE) -Reducer 4 <- Map 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 vectorized, llap - File Output Operator [FS_239] - Limit [LIM_238] (rows=100 width=10) + Reducer 7 vectorized, llap + File Output Operator [FS_240] + Limit [LIM_239] (rows=100 width=10) Number of rows:100 - Select Operator [SEL_237] (rows=805 width=10) + Select Operator [SEL_238] (rows=805 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 4 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_236] - Group By Operator [GBY_235] (rows=805 width=10) + <-Reducer 6 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_237] + Group By Operator [GBY_236] (rows=805 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 3 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_234] + <-Map 5 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_235] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_233] (rows=1610 width=10) + Group By Operator [GBY_234] (rows=1610 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col9)","count(_col16)","count(_col3)"],keys:_col8, _col15, _col2 - Top N Key Operator [TNK_232] (rows=1610 width=10) + Top N Key Operator [TNK_233] (rows=1610 width=10) keys:_col8, _col15, _col2,top n:100 - Map Join Operator [MAPJOIN_231] (rows=1610 width=10) - Conds:RS_207._col1, _col3=SEL_230._col11, _col13(Inner),Output:["_col2","_col3","_col8","_col9","_col15","_col16"] + Map Join Operator [MAPJOIN_232] (rows=1610 width=10) + Conds:RS_215._col1, _col3=MAPJOIN_231._col11, _col13(Inner),Output:["_col2","_col3","_col8","_col9","_col15","_col16"] <-Map 2 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_207] + BROADCAST [RS_215] PartitionCols:_col1, _col3 - Map Join Operator [MAPJOIN_206] (rows=550 width=10) - Conds:RS_203._col0=SEL_205._col0(Inner),Output:["_col1","_col2","_col3"] + Map Join Operator [MAPJOIN_214] (rows=550 width=10) + Conds:RS_211._col0=SEL_213._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_203] + BROADCAST [RS_211] PartitionCols:_col0 - Select Operator [SEL_202] (rows=170 width=34) + Select Operator [SEL_210] (rows=170 width=34) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_201] (rows=170 width=34) + Filter Operator [FIL_209] (rows=170 width=34) predicate:(v2 is not null and v3 is not null and k1 is not null) TableScan [TS_0] (rows=170 width=34) default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] - <-Select Operator [SEL_205] (rows=500 width=10) + <-Select Operator [SEL_213] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_204] (rows=500 width=10) + Filter Operator [FIL_212] (rows=500 width=10) predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) TableScan [TS_3] (rows=500 width=10) default@src,d3,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_230] (rows=1464 width=10) - Output:["_col3","_col4","_col10","_col11","_col13"] - Map Join Operator [MAPJOIN_229] (rows=1464 width=10) - Conds:MAPJOIN_228._col3, _col5=RS_223._col2, _col4(Inner),Output:["_col1","_col2","_col10","_col11","_col13"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_223] - PartitionCols:_col2, _col4 - Map Join Operator [MAPJOIN_222] (rows=550 width=10) - Conds:SEL_221._col0=RS_219._col0(Inner),Output:["_col2","_col3","_col4","_col5"] - <-Map 10 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_219] - PartitionCols:_col0 - Select Operator [SEL_218] (rows=42 width=34) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_217] (rows=42 width=34) - predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) - TableScan [TS_21] (rows=85 width=34) - default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Select Operator [SEL_221] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_220] (rows=500 width=10) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_18] (rows=500 width=10) - default@src,d2,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_228] (rows=1331 width=10) - Conds:MAPJOIN_227._col2=RS_216._col0(Inner),Output:["_col1","_col2","_col3","_col5"] - <-Map 8 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_216] + <-Map Join Operator [MAPJOIN_231] (rows=1464 width=10) + Conds:MAPJOIN_230._col5, _col7=RS_225._col2, _col4(Inner),Output:["_col3","_col4","_col10","_col11","_col13"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_225] + PartitionCols:_col2, _col4 + Map Join Operator [MAPJOIN_224] (rows=550 width=10) + Conds:SEL_223._col0=RS_221._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + <-Map 10 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_221] + PartitionCols:_col0 + Select Operator [SEL_220] (rows=42 width=34) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_219] (rows=42 width=34) + predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) + TableScan [TS_29] (rows=85 width=34) + default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Select Operator [SEL_223] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_222] (rows=500 width=10) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + TableScan [TS_26] (rows=500 width=10) + default@src,d2,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_230] (rows=1331 width=10) + Conds:RS_208._col0=MAPJOIN_229._col3(Inner),Output:["_col3","_col4","_col5","_col7"] + <-Map 3 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_208] + PartitionCols:_col0 + Select Operator [SEL_207] (rows=12 width=7) + Output:["_col0"] + Filter Operator [FIL_206] (rows=12 width=7) + predicate:((key = 'src1key') and value is not null) + TableScan [TS_6] (rows=25 width=7) + default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_229] (rows=1210 width=10) + Conds:RS_205._col0=MAPJOIN_228._col1(Inner),Output:["_col2","_col3","_col4","_col6"] + <-Map 4 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_205] PartitionCols:_col0 - Select Operator [SEL_215] (rows=12 width=7) + Select Operator [SEL_204] (rows=250 width=10) Output:["_col0"] - Filter Operator [FIL_214] (rows=12 width=7) - predicate:((key = 'src1key') and value is not null) - TableScan [TS_15] (rows=25 width=7) - default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_227] (rows=1210 width=10) - Conds:MAPJOIN_226._col1=RS_213._col0(Inner),Output:["_col1","_col2","_col3","_col5"] - <-Map 7 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_213] - PartitionCols:_col0 - Select Operator [SEL_212] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_211] (rows=250 width=10) - predicate:((value = 'd1value') and key is not null) - TableScan [TS_12] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_226] (rows=1100 width=10) - Conds:SEL_225._col0=RS_210._col3(Inner),Output:["_col1","_col2","_col3","_col5"] - <-Map 6 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_210] - PartitionCols:_col3 - Select Operator [SEL_209] (rows=42 width=34) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_208] (rows=42 width=34) - predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) - TableScan [TS_9] (rows=85 width=34) - default@ss_n1,ss_n1,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Select Operator [SEL_225] (rows=1000 width=10) - Output:["_col0"] - Filter Operator [FIL_224] (rows=1000 width=10) - predicate:((key = 'srcpartkey') and value is not null) - TableScan [TS_6] (rows=2000 width=10) - default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Filter Operator [FIL_203] (rows=250 width=10) + predicate:((value = 'd1value') and key is not null) + TableScan [TS_9] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_228] (rows=1100 width=10) + Conds:SEL_227._col0=RS_218._col3(Inner),Output:["_col1","_col2","_col3","_col5"] + <-Map 8 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_218] + PartitionCols:_col3 + Select Operator [SEL_217] (rows=42 width=34) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_216] (rows=42 width=34) + predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) + TableScan [TS_15] (rows=85 width=34) + default@ss_n1,ss_n1,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Select Operator [SEL_227] (rows=1000 width=10) + Output:["_col0"] + Filter Operator [FIL_226] (rows=1000 width=10) + predicate:((key = 'srcpartkey') and value is not null) + TableScan [TS_12] (rows=2000 width=10) + default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: explain SELECT x.key, z.value, y.value @@ -1198,131 +1194,131 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 2 (BROADCAST_EDGE) -Map 10 <- Union 11 (CONTAINS) -Map 13 <- Union 11 (CONTAINS) -Map 3 <- Union 4 (CONTAINS) -Map 8 <- Union 4 (CONTAINS) -Map 9 <- Map 2 (BROADCAST_EDGE) -Reducer 12 <- Map 9 (BROADCAST_EDGE), Union 11 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 5 <- Map 1 (BROADCAST_EDGE), Union 4 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 7 <- Union 6 (SIMPLE_EDGE) +Map 1 <- Union 2 (CONTAINS) +Map 12 <- Union 10 (CONTAINS) +Map 13 <- Map 8 (BROADCAST_EDGE) +Map 6 <- Union 2 (CONTAINS) +Map 7 <- Map 8 (BROADCAST_EDGE) +Map 9 <- Union 10 (CONTAINS) +Reducer 11 <- Map 13 (BROADCAST_EDGE), Union 10 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 3 <- Map 7 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 vectorized, llap - File Output Operator [FS_189] - Group By Operator [GBY_188] (rows=605 width=10) + Reducer 5 vectorized, llap + File Output Operator [FS_191] + Group By Operator [GBY_190] (rows=605 width=10) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 6 [SIMPLE_EDGE] - <-Reducer 12 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_207] + <-Union 4 [SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_209] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_206] (rows=1210 width=10) + Group By Operator [GBY_208] (rows=1210 width=10) Output:["_col0","_col1","_col2"],keys:_col0, _col1, _col2 - Select Operator [SEL_205] (rows=605 width=10) + Select Operator [SEL_207] (rows=605 width=10) Output:["_col0","_col1","_col2"] - Map Join Operator [MAPJOIN_204] (rows=605 width=10) - Conds:RS_201._col3=SEL_203._col0(Inner),Output:["_col1","_col2","_col4"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_201] + Map Join Operator [MAPJOIN_206] (rows=605 width=10) + Conds:SEL_205._col0=RS_203._col3(Inner),Output:["_col0","_col2","_col3"] + <-Map 13 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_203] PartitionCols:_col3 - Map Join Operator [MAPJOIN_200] (rows=550 width=10) - Conds:SEL_199._col0=RS_177._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 2 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_177] + Map Join Operator [MAPJOIN_202] (rows=550 width=10) + Conds:SEL_201._col0=RS_179._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 8 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_179] PartitionCols:_col0 - Select Operator [SEL_175] (rows=25 width=7) + Select Operator [SEL_177] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_174] (rows=25 width=7) + Filter Operator [FIL_176] (rows=25 width=7) predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25 width=7) + TableScan [TS_16] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_199] (rows=500 width=10) + <-Select Operator [SEL_201] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_198] (rows=500 width=10) + Filter Operator [FIL_200] (rows=500 width=10) predicate:key is not null - TableScan [TS_26] (rows=500 width=10) + TableScan [TS_40] (rows=500 width=10) default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_203] (rows=262 width=10) + <-Select Operator [SEL_205] (rows=262 width=10) Output:["_col0"] - Group By Operator [GBY_202] (rows=262 width=10) + Group By Operator [GBY_204] (rows=262 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 11 [SIMPLE_EDGE] - <-Map 10 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_197] + <-Union 10 [SIMPLE_EDGE] + <-Map 12 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_213] PartitionCols:_col0, _col1 - Group By Operator [GBY_196] (rows=525 width=10) + Group By Operator [GBY_212] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_195] (rows=25 width=7) + Select Operator [SEL_211] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_194] (rows=25 width=7) + Filter Operator [FIL_210] (rows=500 width=10) predicate:value is not null - TableScan [TS_150] (rows=25 width=7) + TableScan [TS_166] (rows=500 width=10) Output:["key","value"] - <-Map 13 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_211] + <-Map 9 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_199] PartitionCols:_col0, _col1 - Group By Operator [GBY_210] (rows=525 width=10) + Group By Operator [GBY_198] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_209] (rows=500 width=10) + Select Operator [SEL_197] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_208] (rows=500 width=10) + Filter Operator [FIL_196] (rows=25 width=7) predicate:value is not null - TableScan [TS_164] (rows=500 width=10) + TableScan [TS_152] (rows=25 width=7) Output:["key","value"] - <-Reducer 5 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_187] + <-Reducer 3 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_189] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_186] (rows=1210 width=10) + Group By Operator [GBY_188] (rows=1210 width=10) Output:["_col0","_col1","_col2"],keys:_col0, _col1, _col2 - Select Operator [SEL_185] (rows=605 width=10) + Select Operator [SEL_187] (rows=605 width=10) Output:["_col0","_col1","_col2"] - Map Join Operator [MAPJOIN_184] (rows=605 width=10) - Conds:RS_181._col3=SEL_183._col0(Inner),Output:["_col1","_col2","_col4"] - <-Map 1 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_181] + Map Join Operator [MAPJOIN_186] (rows=605 width=10) + Conds:SEL_185._col0=RS_183._col3(Inner),Output:["_col0","_col2","_col3"] + <-Map 7 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_183] PartitionCols:_col3 - Map Join Operator [MAPJOIN_180] (rows=550 width=10) - Conds:SEL_179._col0=RS_176._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 2 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_176] + Map Join Operator [MAPJOIN_182] (rows=550 width=10) + Conds:SEL_181._col0=RS_178._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 8 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_178] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_175] - <-Select Operator [SEL_179] (rows=500 width=10) + Please refer to the previous Select Operator [SEL_177] + <-Select Operator [SEL_181] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_178] (rows=500 width=10) + Filter Operator [FIL_180] (rows=500 width=10) predicate:key is not null - TableScan [TS_0] (rows=500 width=10) + TableScan [TS_13] (rows=500 width=10) default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_183] (rows=262 width=10) + <-Select Operator [SEL_185] (rows=262 width=10) Output:["_col0"] - Group By Operator [GBY_182] (rows=262 width=10) + Group By Operator [GBY_184] (rows=262 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 4 [SIMPLE_EDGE] - <-Map 3 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_173] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_175] PartitionCols:_col0, _col1 - Group By Operator [GBY_172] (rows=525 width=10) + Group By Operator [GBY_174] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_171] (rows=25 width=7) + Select Operator [SEL_173] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_170] (rows=25 width=7) + Filter Operator [FIL_172] (rows=25 width=7) predicate:value is not null - TableScan [TS_130] (rows=25 width=7) + TableScan [TS_132] (rows=25 width=7) Output:["key","value"] - <-Map 8 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_193] + <-Map 6 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_195] PartitionCols:_col0, _col1 - Group By Operator [GBY_192] (rows=525 width=10) + Group By Operator [GBY_194] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_191] (rows=500 width=10) + Select Operator [SEL_193] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_190] (rows=500 width=10) + Filter Operator [FIL_192] (rows=500 width=10) predicate:value is not null - TableScan [TS_144] (rows=500 width=10) + TableScan [TS_146] (rows=500 width=10) Output:["key","value"] PREHOOK: query: explain @@ -1360,252 +1356,252 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 2 (BROADCAST_EDGE) -Map 10 <- Union 4 (CONTAINS) -Map 11 <- Map 2 (BROADCAST_EDGE) -Map 12 <- Union 13 (CONTAINS) -Map 17 <- Union 13 (CONTAINS) -Map 18 <- Union 15 (CONTAINS) +Map 1 <- Union 2 (CONTAINS) +Map 11 <- Union 12 (CONTAINS) +Map 16 <- Union 12 (CONTAINS) +Map 17 <- Union 14 (CONTAINS) +Map 18 <- Map 10 (BROADCAST_EDGE) Map 19 <- Union 20 (CONTAINS) Map 26 <- Union 20 (CONTAINS) Map 27 <- Union 22 (CONTAINS) Map 28 <- Union 24 (CONTAINS) -Map 3 <- Union 4 (CONTAINS) -Reducer 14 <- Union 13 (SIMPLE_EDGE), Union 15 (CONTAINS) -Reducer 16 <- Map 11 (BROADCAST_EDGE), Union 15 (SIMPLE_EDGE), Union 6 (CONTAINS) +Map 8 <- Union 2 (CONTAINS) +Map 9 <- Map 10 (BROADCAST_EDGE) +Reducer 13 <- Union 12 (SIMPLE_EDGE), Union 14 (CONTAINS) +Reducer 15 <- Map 18 (BROADCAST_EDGE), Union 14 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 21 <- Union 20 (SIMPLE_EDGE), Union 22 (CONTAINS) Reducer 23 <- Union 22 (SIMPLE_EDGE), Union 24 (CONTAINS) -Reducer 25 <- Map 1 (BROADCAST_EDGE), Union 24 (SIMPLE_EDGE), Union 8 (CONTAINS) -Reducer 5 <- Map 1 (BROADCAST_EDGE), Union 4 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 7 <- Union 6 (SIMPLE_EDGE), Union 8 (CONTAINS) -Reducer 9 <- Union 8 (SIMPLE_EDGE) +Reducer 25 <- Map 9 (BROADCAST_EDGE), Union 24 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 3 <- Map 9 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 7 <- Union 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 9 vectorized, llap - File Output Operator [FS_356] - Group By Operator [GBY_355] (rows=605 width=10) + Reducer 7 vectorized, llap + File Output Operator [FS_359] + Group By Operator [GBY_358] (rows=605 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 8 [SIMPLE_EDGE] + <-Union 6 [SIMPLE_EDGE] <-Reducer 25 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_404] + Reduce Output Operator [RS_407] PartitionCols:_col0, _col1 - Group By Operator [GBY_403] (rows=1210 width=10) + Group By Operator [GBY_406] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_402] (rows=605 width=10) + Select Operator [SEL_405] (rows=605 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_401] (rows=605 width=10) - Conds:RS_345._col3=SEL_400._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_345] + Map Join Operator [MAPJOIN_404] (rows=605 width=10) + Conds:SEL_403._col0=RS_348._col3(Inner),Output:["_col2","_col3"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_348] PartitionCols:_col3 - Map Join Operator [MAPJOIN_343] (rows=550 width=10) - Conds:SEL_342._col0=RS_339._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 2 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_339] + Map Join Operator [MAPJOIN_346] (rows=550 width=10) + Conds:SEL_345._col0=RS_342._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 10 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_342] PartitionCols:_col0 - Select Operator [SEL_338] (rows=25 width=7) + Select Operator [SEL_341] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_337] (rows=25 width=7) + Filter Operator [FIL_340] (rows=25 width=7) predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25 width=7) + TableScan [TS_16] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_342] (rows=500 width=10) + <-Select Operator [SEL_345] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_341] (rows=500 width=10) + Filter Operator [FIL_344] (rows=500 width=10) predicate:key is not null - TableScan [TS_0] (rows=500 width=10) + TableScan [TS_13] (rows=500 width=10) default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_400] (rows=440 width=10) + <-Select Operator [SEL_403] (rows=440 width=10) Output:["_col0"] - Group By Operator [GBY_399] (rows=440 width=10) + Group By Operator [GBY_402] (rows=440 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 24 [SIMPLE_EDGE] <-Map 28 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_416] + Reduce Output Operator [RS_419] PartitionCols:_col0, _col1 - Group By Operator [GBY_415] (rows=881 width=10) + Group By Operator [GBY_418] (rows=881 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_414] (rows=500 width=10) + Select Operator [SEL_417] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_413] (rows=500 width=10) + Filter Operator [FIL_416] (rows=500 width=10) predicate:value is not null - TableScan [TS_327] (rows=500 width=10) + TableScan [TS_330] (rows=500 width=10) Output:["key","value"] <-Reducer 23 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_398] + Reduce Output Operator [RS_401] PartitionCols:_col0, _col1 - Group By Operator [GBY_397] (rows=881 width=10) + Group By Operator [GBY_400] (rows=881 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_396] (rows=381 width=10) + Select Operator [SEL_399] (rows=381 width=10) Output:["_col0","_col1"] - Group By Operator [GBY_395] (rows=381 width=10) + Group By Operator [GBY_398] (rows=381 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 22 [SIMPLE_EDGE] <-Map 27 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_412] + Reduce Output Operator [RS_415] PartitionCols:_col0, _col1 - Group By Operator [GBY_411] (rows=762 width=10) + Group By Operator [GBY_414] (rows=762 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_410] (rows=500 width=10) + Select Operator [SEL_413] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_409] (rows=500 width=10) + Filter Operator [FIL_412] (rows=500 width=10) predicate:value is not null - TableScan [TS_321] (rows=500 width=10) + TableScan [TS_324] (rows=500 width=10) Output:["key","value"] <-Reducer 21 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_394] + Reduce Output Operator [RS_397] PartitionCols:_col0, _col1 - Group By Operator [GBY_393] (rows=762 width=10) + Group By Operator [GBY_396] (rows=762 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_392] (rows=262 width=10) + Select Operator [SEL_395] (rows=262 width=10) Output:["_col0","_col1"] - Group By Operator [GBY_391] (rows=262 width=10) + Group By Operator [GBY_394] (rows=262 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 20 [SIMPLE_EDGE] <-Map 19 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_390] + Reduce Output Operator [RS_393] PartitionCols:_col0, _col1 - Group By Operator [GBY_389] (rows=525 width=10) + Group By Operator [GBY_392] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_388] (rows=25 width=7) + Select Operator [SEL_391] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_387] (rows=25 width=7) + Filter Operator [FIL_390] (rows=25 width=7) predicate:value is not null - TableScan [TS_291] (rows=25 width=7) + TableScan [TS_294] (rows=25 width=7) Output:["key","value"] <-Map 26 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_408] + Reduce Output Operator [RS_411] PartitionCols:_col0, _col1 - Group By Operator [GBY_407] (rows=525 width=10) + Group By Operator [GBY_410] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_406] (rows=500 width=10) + Select Operator [SEL_409] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_405] (rows=500 width=10) + Filter Operator [FIL_408] (rows=500 width=10) predicate:value is not null - TableScan [TS_315] (rows=500 width=10) + TableScan [TS_318] (rows=500 width=10) Output:["key","value"] - <-Reducer 7 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_354] + <-Reducer 5 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_357] PartitionCols:_col0, _col1 - Group By Operator [GBY_353] (rows=1210 width=10) + Group By Operator [GBY_356] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_352] (rows=605 width=10) + Group By Operator [GBY_355] (rows=605 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 6 [SIMPLE_EDGE] - <-Reducer 16 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_378] + <-Union 4 [SIMPLE_EDGE] + <-Reducer 15 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_381] PartitionCols:_col0, _col1 - Group By Operator [GBY_377] (rows=1210 width=10) + Group By Operator [GBY_380] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_376] (rows=605 width=10) + Select Operator [SEL_379] (rows=605 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_375] (rows=605 width=10) - Conds:RS_372._col3=SEL_374._col0(Inner),Output:["_col1","_col2"] - <-Map 11 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_372] + Map Join Operator [MAPJOIN_378] (rows=605 width=10) + Conds:SEL_377._col0=RS_375._col3(Inner),Output:["_col2","_col3"] + <-Map 18 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_375] PartitionCols:_col3 - Map Join Operator [MAPJOIN_371] (rows=550 width=10) - Conds:SEL_370._col0=RS_340._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 2 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_340] + Map Join Operator [MAPJOIN_374] (rows=550 width=10) + Conds:SEL_373._col0=RS_343._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 10 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_343] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_338] - <-Select Operator [SEL_370] (rows=500 width=10) + Please refer to the previous Select Operator [SEL_341] + <-Select Operator [SEL_373] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_369] (rows=500 width=10) + Filter Operator [FIL_372] (rows=500 width=10) predicate:key is not null - TableScan [TS_26] (rows=500 width=10) + TableScan [TS_50] (rows=500 width=10) default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_374] (rows=381 width=10) + <-Select Operator [SEL_377] (rows=381 width=10) Output:["_col0"] - Group By Operator [GBY_373] (rows=381 width=10) + Group By Operator [GBY_376] (rows=381 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 15 [SIMPLE_EDGE] - <-Map 18 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_386] + <-Union 14 [SIMPLE_EDGE] + <-Map 17 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_389] PartitionCols:_col0, _col1 - Group By Operator [GBY_385] (rows=762 width=10) + Group By Operator [GBY_388] (rows=762 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_384] (rows=500 width=10) + Select Operator [SEL_387] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_383] (rows=500 width=10) + Filter Operator [FIL_386] (rows=500 width=10) predicate:value is not null - TableScan [TS_285] (rows=500 width=10) + TableScan [TS_288] (rows=500 width=10) Output:["key","value"] - <-Reducer 14 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_368] + <-Reducer 13 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_371] PartitionCols:_col0, _col1 - Group By Operator [GBY_367] (rows=762 width=10) + Group By Operator [GBY_370] (rows=762 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_366] (rows=262 width=10) + Select Operator [SEL_369] (rows=262 width=10) Output:["_col0","_col1"] - Group By Operator [GBY_365] (rows=262 width=10) + Group By Operator [GBY_368] (rows=262 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 13 [SIMPLE_EDGE] - <-Map 12 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_364] + <-Union 12 [SIMPLE_EDGE] + <-Map 11 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_367] PartitionCols:_col0, _col1 - Group By Operator [GBY_363] (rows=525 width=10) + Group By Operator [GBY_366] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_362] (rows=25 width=7) + Select Operator [SEL_365] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_361] (rows=25 width=7) + Filter Operator [FIL_364] (rows=25 width=7) predicate:value is not null - TableScan [TS_260] (rows=25 width=7) + TableScan [TS_263] (rows=25 width=7) Output:["key","value"] - <-Map 17 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_382] + <-Map 16 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_385] PartitionCols:_col0, _col1 - Group By Operator [GBY_381] (rows=525 width=10) + Group By Operator [GBY_384] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_380] (rows=500 width=10) + Select Operator [SEL_383] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_379] (rows=500 width=10) + Filter Operator [FIL_382] (rows=500 width=10) predicate:value is not null - TableScan [TS_279] (rows=500 width=10) + TableScan [TS_282] (rows=500 width=10) Output:["key","value"] - <-Reducer 5 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_351] + <-Reducer 3 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_354] PartitionCols:_col0, _col1 - Group By Operator [GBY_350] (rows=1210 width=10) + Group By Operator [GBY_353] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_349] (rows=605 width=10) + Select Operator [SEL_352] (rows=605 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_348] (rows=605 width=10) - Conds:RS_344._col3=SEL_347._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_344] + Map Join Operator [MAPJOIN_351] (rows=605 width=10) + Conds:SEL_350._col0=RS_347._col3(Inner),Output:["_col2","_col3"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_347] PartitionCols:_col3 - Please refer to the previous Map Join Operator [MAPJOIN_343] - <-Select Operator [SEL_347] (rows=262 width=10) + Please refer to the previous Map Join Operator [MAPJOIN_346] + <-Select Operator [SEL_350] (rows=262 width=10) Output:["_col0"] - Group By Operator [GBY_346] (rows=262 width=10) + Group By Operator [GBY_349] (rows=262 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 4 [SIMPLE_EDGE] - <-Map 10 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_360] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_339] PartitionCols:_col0, _col1 - Group By Operator [GBY_359] (rows=525 width=10) + Group By Operator [GBY_338] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_358] (rows=500 width=10) + Select Operator [SEL_337] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_357] (rows=500 width=10) + Filter Operator [FIL_336] (rows=25 width=7) predicate:value is not null - TableScan [TS_254] (rows=500 width=10) + TableScan [TS_239] (rows=25 width=7) Output:["key","value"] - <-Map 3 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_336] + <-Map 8 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_363] PartitionCols:_col0, _col1 - Group By Operator [GBY_335] (rows=525 width=10) + Group By Operator [GBY_362] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_334] (rows=25 width=7) + Select Operator [SEL_361] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_333] (rows=25 width=7) + Filter Operator [FIL_360] (rows=500 width=10) predicate:value is not null - TableScan [TS_236] (rows=25 width=7) + TableScan [TS_257] (rows=500 width=10) Output:["key","value"] PREHOOK: query: CREATE TABLE srcbucket_mapjoin_n22(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE @@ -2139,214 +2135,214 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 6 (BROADCAST_EDGE) +Map 1 <- Union 2 (CONTAINS) Map 11 <- Union 12 (CONTAINS) Map 13 <- Union 12 (CONTAINS) Map 14 <- Union 12 (CONTAINS) -Map 15 <- Map 19 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 16 <- Map 19 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 17 <- Map 19 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 18 <- Map 19 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 7 <- Union 8 (CONTAINS) -Map 9 <- Union 8 (CONTAINS) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS), Union 8 (SIMPLE_EDGE) -Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Union 12 (SIMPLE_EDGE), Union 3 (CONTAINS) +Map 15 <- Map 19 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 16 <- Map 19 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 17 <- Map 19 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 18 <- Map 19 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 5 <- Union 2 (CONTAINS) +Map 6 <- Map 9 (BROADCAST_EDGE) +Reducer 3 <- Map 6 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 7 <- Map 10 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Union 12 (SIMPLE_EDGE), Union 4 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Union 3 + Union 4 <-Map 15 [CONTAINS] vectorized, llap - File Output Operator [FS_313] - Select Operator [SEL_312] (rows=1844 width=10) + File Output Operator [FS_314] + Select Operator [SEL_313] (rows=1844 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_311] (rows=1844 width=10) - Conds:MAPJOIN_310._col1=RS_304._col0(Inner),Output:["_col1","_col4"] + Map Join Operator [MAPJOIN_312] (rows=1844 width=10) + Conds:MAPJOIN_311._col1=RS_305._col0(Inner),Output:["_col1","_col4"] <-Map 19 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_304] + BROADCAST [RS_305] PartitionCols:_col0 - Select Operator [SEL_303] (rows=25 width=7) + Select Operator [SEL_304] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_302] (rows=25 width=7) + Filter Operator [FIL_303] (rows=25 width=7) predicate:key is not null - TableScan [TS_63] (rows=25 width=7) + TableScan [TS_64] (rows=25 width=7) default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_310] (rows=1677 width=10) - Conds:SEL_309._col0=RS_275._col1(Inner),Output:["_col1"] - <-Map 6 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_275] + <-Map Join Operator [MAPJOIN_311] (rows=1677 width=10) + Conds:SEL_310._col0=RS_279._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_279] PartitionCols:_col1 - Select Operator [SEL_273] (rows=25 width=7) + Select Operator [SEL_277] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_272] (rows=25 width=7) + Filter Operator [FIL_276] (rows=25 width=7) predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25 width=7) + TableScan [TS_11] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_309] (rows=25 width=7) + <-Select Operator [SEL_310] (rows=25 width=7) Output:["_col0"] - Filter Operator [FIL_308] (rows=25 width=7) + Filter Operator [FIL_309] (rows=25 width=7) predicate:value is not null - TableScan [TS_228] (rows=25 width=7) + TableScan [TS_229] (rows=25 width=7) Output:["value"] <-Map 16 [CONTAINS] vectorized, llap - File Output Operator [FS_319] - Select Operator [SEL_318] (rows=1844 width=10) + File Output Operator [FS_320] + Select Operator [SEL_319] (rows=1844 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_317] (rows=1844 width=10) - Conds:MAPJOIN_316._col1=RS_305._col0(Inner),Output:["_col1","_col4"] + Map Join Operator [MAPJOIN_318] (rows=1844 width=10) + Conds:MAPJOIN_317._col1=RS_306._col0(Inner),Output:["_col1","_col4"] <-Map 19 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_305] + BROADCAST [RS_306] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_303] - <-Map Join Operator [MAPJOIN_316] (rows=1677 width=10) - Conds:SEL_315._col0=RS_276._col1(Inner),Output:["_col1"] - <-Map 6 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_276] + Please refer to the previous Select Operator [SEL_304] + <-Map Join Operator [MAPJOIN_317] (rows=1677 width=10) + Conds:SEL_316._col0=RS_280._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_280] PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_273] - <-Select Operator [SEL_315] (rows=500 width=10) + Please refer to the previous Select Operator [SEL_277] + <-Select Operator [SEL_316] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_314] (rows=500 width=10) + Filter Operator [FIL_315] (rows=500 width=10) predicate:value is not null - TableScan [TS_239] (rows=500 width=10) + TableScan [TS_240] (rows=500 width=10) Output:["value"] <-Map 17 [CONTAINS] vectorized, llap - File Output Operator [FS_325] - Select Operator [SEL_324] (rows=1844 width=10) + File Output Operator [FS_326] + Select Operator [SEL_325] (rows=1844 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_323] (rows=1844 width=10) - Conds:MAPJOIN_322._col1=RS_306._col0(Inner),Output:["_col1","_col4"] + Map Join Operator [MAPJOIN_324] (rows=1844 width=10) + Conds:MAPJOIN_323._col1=RS_307._col0(Inner),Output:["_col1","_col4"] <-Map 19 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_306] + BROADCAST [RS_307] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_303] - <-Map Join Operator [MAPJOIN_322] (rows=1677 width=10) - Conds:SEL_321._col0=RS_277._col1(Inner),Output:["_col1"] - <-Map 6 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_277] + Please refer to the previous Select Operator [SEL_304] + <-Map Join Operator [MAPJOIN_323] (rows=1677 width=10) + Conds:SEL_322._col0=RS_281._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_281] PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_273] - <-Select Operator [SEL_321] (rows=500 width=10) + Please refer to the previous Select Operator [SEL_277] + <-Select Operator [SEL_322] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_320] (rows=500 width=10) + Filter Operator [FIL_321] (rows=500 width=10) predicate:value is not null - TableScan [TS_250] (rows=500 width=10) + TableScan [TS_251] (rows=500 width=10) Output:["value"] <-Map 18 [CONTAINS] vectorized, llap - File Output Operator [FS_331] - Select Operator [SEL_330] (rows=1844 width=10) + File Output Operator [FS_332] + Select Operator [SEL_331] (rows=1844 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_329] (rows=1844 width=10) - Conds:MAPJOIN_328._col1=RS_307._col0(Inner),Output:["_col1","_col4"] + Map Join Operator [MAPJOIN_330] (rows=1844 width=10) + Conds:MAPJOIN_329._col1=RS_308._col0(Inner),Output:["_col1","_col4"] <-Map 19 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_307] + BROADCAST [RS_308] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_303] - <-Map Join Operator [MAPJOIN_328] (rows=1677 width=10) - Conds:SEL_327._col0=RS_278._col1(Inner),Output:["_col1"] - <-Map 6 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_278] + Please refer to the previous Select Operator [SEL_304] + <-Map Join Operator [MAPJOIN_329] (rows=1677 width=10) + Conds:SEL_328._col0=RS_282._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_282] PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_273] - <-Select Operator [SEL_327] (rows=500 width=10) + Please refer to the previous Select Operator [SEL_277] + <-Select Operator [SEL_328] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_326] (rows=500 width=10) + Filter Operator [FIL_327] (rows=500 width=10) predicate:value is not null - TableScan [TS_261] (rows=500 width=10) + TableScan [TS_262] (rows=500 width=10) Output:["value"] - <-Reducer 2 [CONTAINS] llap - File Output Operator [FS_198] - Select Operator [SEL_196] (rows=605 width=10) + <-Reducer 3 [CONTAINS] llap + File Output Operator [FS_204] + Select Operator [SEL_202] (rows=605 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_195] (rows=605 width=10) - Conds:RS_283._col3=Union 8._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_283] + Merge Join Operator [MERGEJOIN_201] (rows=605 width=10) + Conds:Union 2._col0=RS_287._col3(Inner),Output:["_col2","_col3"] + <-Map 6 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_287] PartitionCols:_col3 - Map Join Operator [MAPJOIN_281] (rows=550 width=10) - Conds:SEL_280._col0=RS_274._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 6 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_274] + Map Join Operator [MAPJOIN_285] (rows=550 width=10) + Conds:SEL_284._col0=RS_278._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_278] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_273] - <-Select Operator [SEL_280] (rows=500 width=10) + Please refer to the previous Select Operator [SEL_277] + <-Select Operator [SEL_284] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_279] (rows=500 width=10) + Filter Operator [FIL_283] (rows=500 width=10) predicate:key is not null - TableScan [TS_0] (rows=500 width=10) + TableScan [TS_8] (rows=500 width=10) default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Union 8 [SIMPLE_EDGE] - <-Map 7 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_289] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_275] PartitionCols:_col0 - Select Operator [SEL_288] (rows=25 width=7) + Select Operator [SEL_274] (rows=25 width=7) Output:["_col0"] - Filter Operator [FIL_287] (rows=25 width=7) + Filter Operator [FIL_273] (rows=25 width=7) predicate:value is not null - TableScan [TS_203] (rows=25 width=7) + TableScan [TS_196] (rows=25 width=7) Output:["value"] - <-Map 9 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_292] + <-Map 5 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_290] PartitionCols:_col0 - Select Operator [SEL_291] (rows=500 width=10) + Select Operator [SEL_289] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_290] (rows=500 width=10) + Filter Operator [FIL_288] (rows=500 width=10) predicate:value is not null - TableScan [TS_208] (rows=500 width=10) + TableScan [TS_205] (rows=500 width=10) Output:["value"] - <-Reducer 5 [CONTAINS] llap - File Output Operator [FS_202] - Select Operator [SEL_200] (rows=1127 width=10) + <-Reducer 8 [CONTAINS] llap + File Output Operator [FS_213] + Select Operator [SEL_211] (rows=1127 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_199] (rows=1127 width=10) - Conds:RS_41._col3=Union 12._col0(Inner),Output:["_col1","_col2"] - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_41] + Merge Join Operator [MERGEJOIN_210] (rows=1127 width=10) + Conds:RS_42._col3=Union 12._col0(Inner),Output:["_col1","_col2"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_42] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_181] (rows=550 width=10) - Conds:RS_282._col0=RS_286._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_282] + Merge Join Operator [MERGEJOIN_182] (rows=550 width=10) + Conds:RS_286._col0=RS_293._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 6 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_286] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_280] + Please refer to the previous Select Operator [SEL_284] <-Map 10 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_286] + SHUFFLE [RS_293] PartitionCols:_col0 - Select Operator [SEL_285] (rows=500 width=10) + Select Operator [SEL_292] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_284] (rows=500 width=10) + Filter Operator [FIL_291] (rows=500 width=10) predicate:(key is not null and value is not null) - TableScan [TS_24] (rows=500 width=10) + TableScan [TS_25] (rows=500 width=10) default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Union 12 [SIMPLE_EDGE] <-Map 11 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_295] + Reduce Output Operator [RS_296] PartitionCols:_col0 - Select Operator [SEL_294] (rows=25 width=7) + Select Operator [SEL_295] (rows=25 width=7) Output:["_col0"] - Filter Operator [FIL_293] (rows=25 width=7) + Filter Operator [FIL_294] (rows=25 width=7) predicate:value is not null - TableScan [TS_213] (rows=25 width=7) + TableScan [TS_214] (rows=25 width=7) Output:["value"] <-Map 13 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_298] + Reduce Output Operator [RS_299] PartitionCols:_col0 - Select Operator [SEL_297] (rows=500 width=10) + Select Operator [SEL_298] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_296] (rows=500 width=10) + Filter Operator [FIL_297] (rows=500 width=10) predicate:value is not null - TableScan [TS_218] (rows=500 width=10) + TableScan [TS_219] (rows=500 width=10) Output:["value"] <-Map 14 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_301] + Reduce Output Operator [RS_302] PartitionCols:_col0 - Select Operator [SEL_300] (rows=500 width=10) + Select Operator [SEL_301] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_299] (rows=500 width=10) + Filter Operator [FIL_300] (rows=500 width=10) predicate:value is not null - TableScan [TS_223] (rows=500 width=10) + TableScan [TS_224] (rows=500 width=10) Output:["value"] PREHOOK: query: explain @@ -2384,280 +2380,280 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 9 (BROADCAST_EDGE) -Map 10 <- Union 11 (CONTAINS) -Map 13 <- Union 11 (CONTAINS) +Map 1 <- Union 2 (CONTAINS) +Map 10 <- Map 13 (BROADCAST_EDGE) +Map 13 <- Map 32 (BROADCAST_EDGE) Map 15 <- Union 16 (CONTAINS) Map 20 <- Union 16 (CONTAINS) Map 21 <- Union 18 (CONTAINS) -Map 23 <- Union 24 (CONTAINS) -Map 30 <- Union 24 (CONTAINS) -Map 31 <- Union 26 (CONTAINS) -Map 32 <- Union 28 (CONTAINS) -Map 9 <- Map 22 (BROADCAST_EDGE) -Reducer 12 <- Union 11 (SIMPLE_EDGE) +Map 22 <- Union 23 (CONTAINS) +Map 29 <- Union 23 (CONTAINS) +Map 30 <- Union 25 (CONTAINS) +Map 31 <- Union 27 (CONTAINS) +Map 9 <- Union 2 (CONTAINS) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 17 <- Union 16 (SIMPLE_EDGE), Union 18 (CONTAINS) Reducer 19 <- Union 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE), Union 3 (CONTAINS) -Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS) -Reducer 27 <- Union 26 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 29 <- Map 9 (BROADCAST_EDGE), Union 28 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 6 <- Union 5 (SIMPLE_EDGE) -Reducer 7 <- Map 1 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) -Reducer 8 <- Reducer 19 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 24 <- Union 23 (SIMPLE_EDGE), Union 25 (CONTAINS) +Reducer 26 <- Union 25 (SIMPLE_EDGE), Union 27 (CONTAINS) +Reducer 28 <- Map 13 (BROADCAST_EDGE), Union 27 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) +Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 8 <- Union 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized, llap - File Output Operator [FS_347] - Group By Operator [GBY_346] (rows=544 width=10) + Reducer 8 vectorized, llap + File Output Operator [FS_353] + Group By Operator [GBY_352] (rows=544 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 29 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_395] + <-Union 7 [SIMPLE_EDGE] + <-Reducer 28 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_397] PartitionCols:_col0, _col1 - Group By Operator [GBY_394] (rows=1089 width=10) + Group By Operator [GBY_396] (rows=1089 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_393] (rows=484 width=10) + Select Operator [SEL_395] (rows=484 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_392] (rows=484 width=10) - Conds:RS_334._col3=SEL_391._col0(Inner),Output:["_col1","_col2"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_334] + Map Join Operator [MAPJOIN_394] (rows=484 width=10) + Conds:SEL_393._col0=RS_343._col3(Inner),Output:["_col2","_col3"] + <-Map 13 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_343] PartitionCols:_col3 - Map Join Operator [MAPJOIN_333] (rows=27 width=7) - Conds:SEL_331._col0=RS_327._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 22 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_327] + Map Join Operator [MAPJOIN_342] (rows=27 width=7) + Conds:SEL_340._col0=RS_336._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 32 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_336] PartitionCols:_col0 - Select Operator [SEL_326] (rows=25 width=7) + Select Operator [SEL_335] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_325] (rows=25 width=7) + Filter Operator [FIL_334] (rows=25 width=7) predicate:(key is not null and value is not null) - TableScan [TS_72] (rows=25 width=7) + TableScan [TS_106] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_331] (rows=25 width=7) + <-Select Operator [SEL_340] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_329] (rows=25 width=7) + Filter Operator [FIL_338] (rows=25 width=7) predicate:key is not null - TableScan [TS_3] (rows=25 width=7) + TableScan [TS_16] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_391] (rows=440 width=10) + <-Select Operator [SEL_393] (rows=440 width=10) Output:["_col0"] - Group By Operator [GBY_390] (rows=440 width=10) + Group By Operator [GBY_392] (rows=440 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 28 [SIMPLE_EDGE] - <-Map 32 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_407] + <-Union 27 [SIMPLE_EDGE] + <-Map 31 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_409] PartitionCols:_col0, _col1 - Group By Operator [GBY_406] (rows=881 width=10) + Group By Operator [GBY_408] (rows=881 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_405] (rows=500 width=10) + Select Operator [SEL_407] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_404] (rows=500 width=10) + Filter Operator [FIL_406] (rows=500 width=10) predicate:value is not null - TableScan [TS_319] (rows=500 width=10) + TableScan [TS_321] (rows=500 width=10) Output:["key","value"] - <-Reducer 27 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_389] + <-Reducer 26 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_391] PartitionCols:_col0, _col1 - Group By Operator [GBY_388] (rows=881 width=10) + Group By Operator [GBY_390] (rows=881 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_387] (rows=381 width=10) + Select Operator [SEL_389] (rows=381 width=10) Output:["_col0","_col1"] - Group By Operator [GBY_386] (rows=381 width=10) + Group By Operator [GBY_388] (rows=381 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 26 [SIMPLE_EDGE] - <-Map 31 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_403] + <-Union 25 [SIMPLE_EDGE] + <-Map 30 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_405] PartitionCols:_col0, _col1 - Group By Operator [GBY_402] (rows=762 width=10) + Group By Operator [GBY_404] (rows=762 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_401] (rows=500 width=10) + Select Operator [SEL_403] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_400] (rows=500 width=10) + Filter Operator [FIL_402] (rows=500 width=10) predicate:value is not null - TableScan [TS_313] (rows=500 width=10) + TableScan [TS_315] (rows=500 width=10) Output:["key","value"] - <-Reducer 25 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_385] + <-Reducer 24 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_387] PartitionCols:_col0, _col1 - Group By Operator [GBY_384] (rows=762 width=10) + Group By Operator [GBY_386] (rows=762 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_383] (rows=262 width=10) + Select Operator [SEL_385] (rows=262 width=10) Output:["_col0","_col1"] - Group By Operator [GBY_382] (rows=262 width=10) + Group By Operator [GBY_384] (rows=262 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 24 [SIMPLE_EDGE] - <-Map 23 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_381] + <-Union 23 [SIMPLE_EDGE] + <-Map 22 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_383] PartitionCols:_col0, _col1 - Group By Operator [GBY_380] (rows=525 width=10) + Group By Operator [GBY_382] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_379] (rows=25 width=7) + Select Operator [SEL_381] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_378] (rows=25 width=7) + Filter Operator [FIL_380] (rows=25 width=7) predicate:value is not null - TableScan [TS_283] (rows=25 width=7) + TableScan [TS_285] (rows=25 width=7) Output:["key","value"] - <-Map 30 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_399] + <-Map 29 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_401] PartitionCols:_col0, _col1 - Group By Operator [GBY_398] (rows=525 width=10) + Group By Operator [GBY_400] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_397] (rows=500 width=10) + Select Operator [SEL_399] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_396] (rows=500 width=10) + Filter Operator [FIL_398] (rows=500 width=10) predicate:value is not null - TableScan [TS_307] (rows=500 width=10) + TableScan [TS_309] (rows=500 width=10) Output:["key","value"] - <-Reducer 4 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_345] + <-Reducer 6 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_351] PartitionCols:_col0, _col1 - Group By Operator [GBY_344] (rows=1089 width=10) + Group By Operator [GBY_350] (rows=1089 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_343] (rows=605 width=10) + Group By Operator [GBY_349] (rows=605 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 3 [SIMPLE_EDGE] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_238] + <-Union 5 [SIMPLE_EDGE] + <-Reducer 12 [CONTAINS] llap + Reduce Output Operator [RS_261] PartitionCols:_col0, _col1 - Group By Operator [GBY_237] (rows=1210 width=10) + Group By Operator [GBY_260] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_235] (rows=605 width=10) + Select Operator [SEL_258] (rows=605 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_234] (rows=605 width=10) - Conds:RS_339._col3=RS_342._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_339] + Merge Join Operator [MERGEJOIN_257] (rows=605 width=10) + Conds:RS_59._col3=RS_363._col0(Inner),Output:["_col1","_col2"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_59] PartitionCols:_col3 - Map Join Operator [MAPJOIN_337] (rows=550 width=10) - Conds:SEL_336._col0=RS_332._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_332] + Merge Join Operator [MERGEJOIN_228] (rows=550 width=10) + Conds:RS_347._col0=RS_360._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 10 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_347] PartitionCols:_col0 - Select Operator [SEL_330] (rows=25 width=7) + Select Operator [SEL_345] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_328] (rows=25 width=7) + Filter Operator [FIL_344] (rows=500 width=10) + predicate:key is not null + TableScan [TS_13] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map 14 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_360] + PartitionCols:_col0 + Select Operator [SEL_359] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_358] (rows=500 width=10) predicate:(key is not null and value is not null) - Please refer to the previous TableScan [TS_3] - <-Select Operator [SEL_336] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_335] (rows=500 width=10) - predicate:key is not null - TableScan [TS_0] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 12 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_342] - PartitionCols:_col0 - Select Operator [SEL_341] (rows=262 width=10) - Output:["_col0"] - Group By Operator [GBY_340] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 11 [SIMPLE_EDGE] - <-Map 10 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_357] - PartitionCols:_col0, _col1 - Group By Operator [GBY_356] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_355] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_354] (rows=25 width=7) - predicate:value is not null - TableScan [TS_248] (rows=25 width=7) - Output:["key","value"] - <-Map 13 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_361] - PartitionCols:_col0, _col1 - Group By Operator [GBY_360] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_359] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_358] (rows=500 width=10) - predicate:value is not null - TableScan [TS_254] (rows=500 width=10) - Output:["key","value"] - <-Reducer 8 [CONTAINS] llap - Reduce Output Operator [RS_247] - PartitionCols:_col0, _col1 - Group By Operator [GBY_246] (rows=1210 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_244] (rows=605 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_243] (rows=605 width=10) - Conds:RS_58._col3=RS_353._col0(Inner),Output:["_col1","_col2"] + TableScan [TS_30] (rows=500 width=10) + default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Reducer 19 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_353] + SHUFFLE [RS_363] PartitionCols:_col0 - Select Operator [SEL_352] (rows=381 width=10) + Select Operator [SEL_362] (rows=381 width=10) Output:["_col0"] - Group By Operator [GBY_351] (rows=381 width=10) + Group By Operator [GBY_361] (rows=381 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 18 [SIMPLE_EDGE] <-Map 21 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_377] + Reduce Output Operator [RS_379] PartitionCols:_col0, _col1 - Group By Operator [GBY_376] (rows=762 width=10) + Group By Operator [GBY_378] (rows=762 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_375] (rows=500 width=10) + Select Operator [SEL_377] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_374] (rows=500 width=10) + Filter Operator [FIL_376] (rows=500 width=10) predicate:value is not null - TableScan [TS_277] (rows=500 width=10) + TableScan [TS_279] (rows=500 width=10) Output:["key","value"] <-Reducer 17 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_369] + Reduce Output Operator [RS_371] PartitionCols:_col0, _col1 - Group By Operator [GBY_368] (rows=762 width=10) + Group By Operator [GBY_370] (rows=762 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_367] (rows=262 width=10) + Select Operator [SEL_369] (rows=262 width=10) Output:["_col0","_col1"] - Group By Operator [GBY_366] (rows=262 width=10) + Group By Operator [GBY_368] (rows=262 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 16 [SIMPLE_EDGE] <-Map 15 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_365] + Reduce Output Operator [RS_367] PartitionCols:_col0, _col1 - Group By Operator [GBY_364] (rows=525 width=10) + Group By Operator [GBY_366] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_363] (rows=25 width=7) + Select Operator [SEL_365] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_362] (rows=25 width=7) + Filter Operator [FIL_364] (rows=25 width=7) predicate:value is not null - TableScan [TS_260] (rows=25 width=7) + TableScan [TS_262] (rows=25 width=7) Output:["key","value"] <-Map 20 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_373] + Reduce Output Operator [RS_375] PartitionCols:_col0, _col1 - Group By Operator [GBY_372] (rows=525 width=10) + Group By Operator [GBY_374] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_371] (rows=500 width=10) + Select Operator [SEL_373] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_370] (rows=500 width=10) + Filter Operator [FIL_372] (rows=500 width=10) predicate:value is not null - TableScan [TS_271] (rows=500 width=10) + TableScan [TS_273] (rows=500 width=10) Output:["key","value"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_58] + <-Reducer 4 [CONTAINS] llap + Reduce Output Operator [RS_246] + PartitionCols:_col0, _col1 + Group By Operator [GBY_245] (rows=1210 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_243] (rows=605 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_242] (rows=605 width=10) + Conds:RS_333._col0=RS_348._col3(Inner),Output:["_col2","_col3"] + <-Map 10 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_348] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_226] (rows=550 width=10) - Conds:RS_338._col0=RS_350._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_338] + Map Join Operator [MAPJOIN_346] (rows=550 width=10) + Conds:SEL_345._col0=RS_341._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 13 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_341] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_336] - <-Map 14 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_350] - PartitionCols:_col0 - Select Operator [SEL_349] (rows=500 width=10) + Select Operator [SEL_339] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_348] (rows=500 width=10) + Filter Operator [FIL_337] (rows=25 width=7) predicate:(key is not null and value is not null) - TableScan [TS_29] (rows=500 width=10) - default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Please refer to the previous TableScan [TS_16] + Please refer to the previous Select Operator [SEL_345] + <-Reducer 3 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_333] + PartitionCols:_col0 + Select Operator [SEL_332] (rows=262 width=10) + Output:["_col0"] + Group By Operator [GBY_331] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_330] + PartitionCols:_col0, _col1 + Group By Operator [GBY_329] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_328] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_327] (rows=25 width=7) + predicate:value is not null + TableScan [TS_236] (rows=25 width=7) + Output:["key","value"] + <-Map 9 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_357] + PartitionCols:_col0, _col1 + Group By Operator [GBY_356] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_355] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_354] (rows=500 width=10) + predicate:value is not null + TableScan [TS_251] (rows=500 width=10) + Output:["key","value"] PREHOOK: query: CREATE TABLE a_n19(key STRING, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -2734,22 +2730,22 @@ POSTHOOK: Output: default@c_n4 Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 9 (BROADCAST_EDGE) -Map 10 <- Union 11 (CONTAINS) -Map 12 <- Union 11 (CONTAINS) +Map 1 <- Union 2 (CONTAINS) Map 14 <- Union 15 (CONTAINS) Map 16 <- Union 15 (CONTAINS) Map 17 <- Union 15 (CONTAINS) -Map 18 <- Map 22 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 19 <- Map 22 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 20 <- Map 22 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 3 (CONTAINS) -Map 21 <- Map 22 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 3 (CONTAINS) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 11 (SIMPLE_EDGE), Union 3 (CONTAINS) -Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Union 3 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Union 3 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Union 15 (SIMPLE_EDGE), Union 3 (CONTAINS) +Map 18 <- Map 12 (BROADCAST_EDGE), Map 22 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 19 <- Map 12 (BROADCAST_EDGE), Map 22 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 20 <- Map 12 (BROADCAST_EDGE), Map 22 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 21 <- Map 12 (BROADCAST_EDGE), Map 22 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 8 <- Union 2 (CONTAINS) +Map 9 <- Map 12 (BROADCAST_EDGE) +Reducer 10 <- Map 13 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 15 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 4 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Union 4 (CUSTOM_SIMPLE_EDGE) Stage-5 Stats Work{} @@ -2759,361 +2755,361 @@ Stage-5 Stage-4 Dependency Collection{} Stage-3 - Reducer 4 llap - File Output Operator [FS_82] - Group By Operator [GBY_80] (rows=1 width=880) + Reducer 5 llap + File Output Operator [FS_83] + Group By Operator [GBY_81] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Union 3 [CUSTOM_SIMPLE_EDGE] + <-Union 4 [CUSTOM_SIMPLE_EDGE] <-Map 18 [CONTAINS] llap - File Output Operator [FS_286] + File Output Operator [FS_287] table:{"name:":"default.a_n19"} - Select Operator [SEL_283] (rows=1844 width=10) + Select Operator [SEL_284] (rows=1844 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_281] (rows=1844 width=10) - Conds:MAPJOIN_280._col1=RS_396._col0(Inner),Output:["_col1","_col4"] + Map Join Operator [MAPJOIN_282] (rows=1844 width=10) + Conds:MAPJOIN_281._col1=RS_397._col0(Inner),Output:["_col1","_col4"] <-Map 22 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_396] + BROADCAST [RS_397] PartitionCols:_col0 - Select Operator [SEL_395] (rows=25 width=7) + Select Operator [SEL_396] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_394] (rows=25 width=7) + Filter Operator [FIL_395] (rows=25 width=7) predicate:key is not null - TableScan [TS_63] (rows=25 width=7) + TableScan [TS_64] (rows=25 width=7) default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_280] (rows=1677 width=10) - Conds:SEL_282._col0=RS_367._col1(Inner),Output:["_col1"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_367] + <-Map Join Operator [MAPJOIN_281] (rows=1677 width=10) + Conds:SEL_283._col0=RS_371._col1(Inner),Output:["_col1"] + <-Map 12 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_371] PartitionCols:_col1 - Select Operator [SEL_365] (rows=25 width=7) + Select Operator [SEL_369] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_364] (rows=25 width=7) + Filter Operator [FIL_368] (rows=25 width=7) predicate:(key is not null and value is not null) - TableScan [TS_3] (rows=25 width=7) + TableScan [TS_11] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_282] (rows=25 width=7) + <-Select Operator [SEL_283] (rows=25 width=7) Output:["_col0"] - Filter Operator [FIL_279] (rows=25 width=7) + Filter Operator [FIL_280] (rows=25 width=7) predicate:value is not null - TableScan [TS_276] (rows=25 width=7) + TableScan [TS_277] (rows=25 width=7) Output:["value"] - Reduce Output Operator [RS_295] - Group By Operator [GBY_292] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_287] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_283] - File Output Operator [FS_288] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_283] Reduce Output Operator [RS_296] Group By Operator [GBY_293] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_289] (rows=3576 width=10) + Select Operator [SEL_288] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_283] - File Output Operator [FS_290] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_283] + Please refer to the previous Select Operator [SEL_284] + File Output Operator [FS_289] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_284] Reduce Output Operator [RS_297] Group By Operator [GBY_294] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_291] (rows=3576 width=10) + Select Operator [SEL_290] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_284] + File Output Operator [FS_291] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_284] + Reduce Output Operator [RS_298] + Group By Operator [GBY_295] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_292] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_283] + Please refer to the previous Select Operator [SEL_284] <-Map 19 [CONTAINS] llap - File Output Operator [FS_308] + File Output Operator [FS_309] table:{"name:":"default.a_n19"} - Select Operator [SEL_305] (rows=1844 width=10) + Select Operator [SEL_306] (rows=1844 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_303] (rows=1844 width=10) - Conds:MAPJOIN_302._col1=RS_397._col0(Inner),Output:["_col1","_col4"] + Map Join Operator [MAPJOIN_304] (rows=1844 width=10) + Conds:MAPJOIN_303._col1=RS_398._col0(Inner),Output:["_col1","_col4"] <-Map 22 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_397] + BROADCAST [RS_398] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_395] - <-Map Join Operator [MAPJOIN_302] (rows=1677 width=10) - Conds:SEL_304._col0=RS_368._col1(Inner),Output:["_col1"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_368] + Please refer to the previous Select Operator [SEL_396] + <-Map Join Operator [MAPJOIN_303] (rows=1677 width=10) + Conds:SEL_305._col0=RS_372._col1(Inner),Output:["_col1"] + <-Map 12 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_372] PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_365] - <-Select Operator [SEL_304] (rows=500 width=10) + Please refer to the previous Select Operator [SEL_369] + <-Select Operator [SEL_305] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_301] (rows=500 width=10) + Filter Operator [FIL_302] (rows=500 width=10) predicate:value is not null - TableScan [TS_298] (rows=500 width=10) + TableScan [TS_299] (rows=500 width=10) Output:["value"] - Reduce Output Operator [RS_317] - Group By Operator [GBY_314] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_309] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_305] - File Output Operator [FS_310] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_305] Reduce Output Operator [RS_318] Group By Operator [GBY_315] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_311] (rows=3576 width=10) + Select Operator [SEL_310] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_305] - File Output Operator [FS_312] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_305] + Please refer to the previous Select Operator [SEL_306] + File Output Operator [FS_311] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_306] Reduce Output Operator [RS_319] Group By Operator [GBY_316] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_313] (rows=3576 width=10) + Select Operator [SEL_312] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_306] + File Output Operator [FS_313] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_306] + Reduce Output Operator [RS_320] + Group By Operator [GBY_317] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_314] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_305] + Please refer to the previous Select Operator [SEL_306] <-Map 20 [CONTAINS] llap - File Output Operator [FS_330] + File Output Operator [FS_331] table:{"name:":"default.a_n19"} - Select Operator [SEL_327] (rows=1844 width=10) + Select Operator [SEL_328] (rows=1844 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_325] (rows=1844 width=10) - Conds:MAPJOIN_324._col1=RS_398._col0(Inner),Output:["_col1","_col4"] + Map Join Operator [MAPJOIN_326] (rows=1844 width=10) + Conds:MAPJOIN_325._col1=RS_399._col0(Inner),Output:["_col1","_col4"] <-Map 22 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_398] + BROADCAST [RS_399] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_395] - <-Map Join Operator [MAPJOIN_324] (rows=1677 width=10) - Conds:SEL_326._col0=RS_369._col1(Inner),Output:["_col1"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_369] + Please refer to the previous Select Operator [SEL_396] + <-Map Join Operator [MAPJOIN_325] (rows=1677 width=10) + Conds:SEL_327._col0=RS_373._col1(Inner),Output:["_col1"] + <-Map 12 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_373] PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_365] - <-Select Operator [SEL_326] (rows=500 width=10) + Please refer to the previous Select Operator [SEL_369] + <-Select Operator [SEL_327] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_323] (rows=500 width=10) + Filter Operator [FIL_324] (rows=500 width=10) predicate:value is not null - TableScan [TS_320] (rows=500 width=10) + TableScan [TS_321] (rows=500 width=10) Output:["value"] - Reduce Output Operator [RS_339] - Group By Operator [GBY_336] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_331] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_327] - File Output Operator [FS_332] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_327] Reduce Output Operator [RS_340] Group By Operator [GBY_337] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_333] (rows=3576 width=10) + Select Operator [SEL_332] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_327] - File Output Operator [FS_334] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_327] + Please refer to the previous Select Operator [SEL_328] + File Output Operator [FS_333] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_328] Reduce Output Operator [RS_341] Group By Operator [GBY_338] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_335] (rows=3576 width=10) + Select Operator [SEL_334] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_327] + Please refer to the previous Select Operator [SEL_328] + File Output Operator [FS_335] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_328] + Reduce Output Operator [RS_342] + Group By Operator [GBY_339] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_336] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_328] <-Map 21 [CONTAINS] llap - File Output Operator [FS_352] + File Output Operator [FS_353] table:{"name:":"default.a_n19"} - Select Operator [SEL_349] (rows=1844 width=10) + Select Operator [SEL_350] (rows=1844 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_347] (rows=1844 width=10) - Conds:MAPJOIN_346._col1=RS_399._col0(Inner),Output:["_col1","_col4"] + Map Join Operator [MAPJOIN_348] (rows=1844 width=10) + Conds:MAPJOIN_347._col1=RS_400._col0(Inner),Output:["_col1","_col4"] <-Map 22 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_399] + BROADCAST [RS_400] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_395] - <-Map Join Operator [MAPJOIN_346] (rows=1677 width=10) - Conds:SEL_348._col0=RS_370._col1(Inner),Output:["_col1"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_370] + Please refer to the previous Select Operator [SEL_396] + <-Map Join Operator [MAPJOIN_347] (rows=1677 width=10) + Conds:SEL_349._col0=RS_374._col1(Inner),Output:["_col1"] + <-Map 12 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_374] PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_365] - <-Select Operator [SEL_348] (rows=500 width=10) + Please refer to the previous Select Operator [SEL_369] + <-Select Operator [SEL_349] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_345] (rows=500 width=10) + Filter Operator [FIL_346] (rows=500 width=10) predicate:value is not null - TableScan [TS_342] (rows=500 width=10) + TableScan [TS_343] (rows=500 width=10) Output:["value"] - Reduce Output Operator [RS_361] - Group By Operator [GBY_358] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_353] (rows=3576 width=10) - Output:["key","value"] - Please refer to the previous Select Operator [SEL_349] - File Output Operator [FS_354] - table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_349] Reduce Output Operator [RS_362] Group By Operator [GBY_359] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_355] (rows=3576 width=10) + Select Operator [SEL_354] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_349] - File Output Operator [FS_356] - table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_349] + Please refer to the previous Select Operator [SEL_350] + File Output Operator [FS_355] + table:{"name:":"default.b_n15"} + Please refer to the previous Select Operator [SEL_350] Reduce Output Operator [RS_363] Group By Operator [GBY_360] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_357] (rows=3576 width=10) + Select Operator [SEL_356] (rows=3576 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_350] + File Output Operator [FS_357] + table:{"name:":"default.c_n4"} + Please refer to the previous Select Operator [SEL_350] + Reduce Output Operator [RS_364] + Group By Operator [GBY_361] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] + Select Operator [SEL_358] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_349] - <-Reducer 2 [CONTAINS] llap - File Output Operator [FS_224] + Please refer to the previous Select Operator [SEL_350] + <-Reducer 11 [CONTAINS] llap + File Output Operator [FS_250] table:{"name:":"default.a_n19"} - Select Operator [SEL_222] (rows=605 width=10) + Select Operator [SEL_248] (rows=1127 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_221] (rows=605 width=10) - Conds:RS_375._col3=Union 11._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_375] + Merge Join Operator [MERGEJOIN_247] (rows=1127 width=10) + Conds:RS_42._col3=Union 15._col0(Inner),Output:["_col1","_col2"] + <-Reducer 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_42] PartitionCols:_col3 - Map Join Operator [MAPJOIN_373] (rows=550 width=10) - Conds:SEL_372._col0=RS_366._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_366] + Merge Join Operator [MERGEJOIN_208] (rows=550 width=10) + Conds:RS_378._col0=RS_385._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 9 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_378] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_365] - <-Select Operator [SEL_372] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_371] (rows=500 width=10) - predicate:key is not null - TableScan [TS_0] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Union 11 [SIMPLE_EDGE] - <-Map 10 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_381] + Select Operator [SEL_376] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_375] (rows=500 width=10) + predicate:key is not null + TableScan [TS_8] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map 13 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_385] + PartitionCols:_col0 + Select Operator [SEL_384] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_383] (rows=500 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_25] (rows=500 width=10) + default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Union 15 [SIMPLE_EDGE] + <-Map 14 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_388] PartitionCols:_col0 - Select Operator [SEL_380] (rows=25 width=7) + Select Operator [SEL_387] (rows=25 width=7) Output:["_col0"] - Filter Operator [FIL_379] (rows=25 width=7) + Filter Operator [FIL_386] (rows=25 width=7) predicate:value is not null - TableScan [TS_251] (rows=25 width=7) + TableScan [TS_262] (rows=25 width=7) Output:["value"] - <-Map 12 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_384] + <-Map 16 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_391] PartitionCols:_col0 - Select Operator [SEL_383] (rows=500 width=10) + Select Operator [SEL_390] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_382] (rows=500 width=10) + Filter Operator [FIL_389] (rows=500 width=10) predicate:value is not null - TableScan [TS_256] (rows=500 width=10) + TableScan [TS_267] (rows=500 width=10) Output:["value"] - Reduce Output Operator [RS_233] - Group By Operator [GBY_230] (rows=1 width=880) + <-Map 17 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_394] + PartitionCols:_col0 + Select Operator [SEL_393] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_392] (rows=500 width=10) + predicate:value is not null + TableScan [TS_272] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_259] + Group By Operator [GBY_256] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_225] (rows=3576 width=10) + Select Operator [SEL_251] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_222] - File Output Operator [FS_226] + Please refer to the previous Select Operator [SEL_248] + File Output Operator [FS_252] table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_222] - Reduce Output Operator [RS_234] - Group By Operator [GBY_231] (rows=1 width=880) + Please refer to the previous Select Operator [SEL_248] + Reduce Output Operator [RS_260] + Group By Operator [GBY_257] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_227] (rows=3576 width=10) + Select Operator [SEL_253] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_222] - File Output Operator [FS_228] + Please refer to the previous Select Operator [SEL_248] + File Output Operator [FS_254] table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_222] - Reduce Output Operator [RS_235] - Group By Operator [GBY_232] (rows=1 width=880) + Please refer to the previous Select Operator [SEL_248] + Reduce Output Operator [RS_261] + Group By Operator [GBY_258] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_229] (rows=3576 width=10) + Select Operator [SEL_255] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_222] - <-Reducer 8 [CONTAINS] llap - File Output Operator [FS_239] + Please refer to the previous Select Operator [SEL_248] + <-Reducer 3 [CONTAINS] llap + File Output Operator [FS_230] table:{"name:":"default.a_n19"} - Select Operator [SEL_237] (rows=1127 width=10) + Select Operator [SEL_228] (rows=605 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_236] (rows=1127 width=10) - Conds:RS_41._col3=Union 15._col0(Inner),Output:["_col1","_col2"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_41] + Merge Join Operator [MERGEJOIN_227] (rows=605 width=10) + Conds:Union 2._col0=RS_379._col3(Inner),Output:["_col2","_col3"] + <-Map 9 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_379] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_207] (rows=550 width=10) - Conds:RS_374._col0=RS_378._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_374] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_372] - <-Map 13 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_378] + Map Join Operator [MAPJOIN_377] (rows=550 width=10) + Conds:SEL_376._col0=RS_370._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_370] PartitionCols:_col0 - Select Operator [SEL_377] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_376] (rows=500 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_24] (rows=500 width=10) - default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Union 15 [SIMPLE_EDGE] - <-Map 14 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_387] - PartitionCols:_col0 - Select Operator [SEL_386] (rows=25 width=7) - Output:["_col0"] - Filter Operator [FIL_385] (rows=25 width=7) - predicate:value is not null - TableScan [TS_261] (rows=25 width=7) - Output:["value"] - <-Map 16 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_390] + Please refer to the previous Select Operator [SEL_369] + Please refer to the previous Select Operator [SEL_376] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_367] PartitionCols:_col0 - Select Operator [SEL_389] (rows=500 width=10) + Select Operator [SEL_366] (rows=25 width=7) Output:["_col0"] - Filter Operator [FIL_388] (rows=500 width=10) + Filter Operator [FIL_365] (rows=25 width=7) predicate:value is not null - TableScan [TS_266] (rows=500 width=10) + TableScan [TS_222] (rows=25 width=7) Output:["value"] - <-Map 17 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_393] + <-Map 8 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_382] PartitionCols:_col0 - Select Operator [SEL_392] (rows=500 width=10) + Select Operator [SEL_381] (rows=500 width=10) Output:["_col0"] - Filter Operator [FIL_391] (rows=500 width=10) + Filter Operator [FIL_380] (rows=500 width=10) predicate:value is not null - TableScan [TS_271] (rows=500 width=10) + TableScan [TS_242] (rows=500 width=10) Output:["value"] - Reduce Output Operator [RS_248] - Group By Operator [GBY_245] (rows=1 width=880) + Reduce Output Operator [RS_239] + Group By Operator [GBY_236] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_240] (rows=3576 width=10) + Select Operator [SEL_231] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_237] - File Output Operator [FS_241] + Please refer to the previous Select Operator [SEL_228] + File Output Operator [FS_232] table:{"name:":"default.b_n15"} - Please refer to the previous Select Operator [SEL_237] - Reduce Output Operator [RS_249] - Group By Operator [GBY_246] (rows=1 width=880) + Please refer to the previous Select Operator [SEL_228] + Reduce Output Operator [RS_240] + Group By Operator [GBY_237] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_242] (rows=3576 width=10) + Select Operator [SEL_233] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_237] - File Output Operator [FS_243] + Please refer to the previous Select Operator [SEL_228] + File Output Operator [FS_234] table:{"name:":"default.c_n4"} - Please refer to the previous Select Operator [SEL_237] - Reduce Output Operator [RS_250] - Group By Operator [GBY_247] (rows=1 width=880) + Please refer to the previous Select Operator [SEL_228] + Reduce Output Operator [RS_241] + Group By Operator [GBY_238] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_244] (rows=3576 width=10) + Select Operator [SEL_235] (rows=3576 width=10) Output:["key","value"] - Please refer to the previous Select Operator [SEL_237] - Reducer 5 llap - File Output Operator [FS_91] - Group By Operator [GBY_89] (rows=1 width=880) - Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <- Please refer to the previous Union 3 [CUSTOM_SIMPLE_EDGE] + Please refer to the previous Select Operator [SEL_228] Reducer 6 llap - File Output Operator [FS_100] - Group By Operator [GBY_98] (rows=1 width=880) + File Output Operator [FS_92] + Group By Operator [GBY_90] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <- Please refer to the previous Union 3 [CUSTOM_SIMPLE_EDGE] + <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] + Reducer 7 llap + File Output Operator [FS_101] + Group By Operator [GBY_99] (rows=1 width=880) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] Stage-6 Stats Work{} Stage-1 @@ -3172,31 +3168,31 @@ POSTHOOK: Output: default@c_n4 Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 12 (BROADCAST_EDGE) -Map 12 <- Map 25 (BROADCAST_EDGE) -Map 13 <- Union 14 (CONTAINS) -Map 16 <- Union 14 (CONTAINS) +Map 1 <- Union 2 (CONTAINS) +Map 12 <- Union 2 (CONTAINS) +Map 13 <- Map 16 (BROADCAST_EDGE) +Map 16 <- Map 35 (BROADCAST_EDGE) Map 18 <- Union 19 (CONTAINS) Map 23 <- Union 19 (CONTAINS) Map 24 <- Union 21 (CONTAINS) -Map 26 <- Union 27 (CONTAINS) -Map 33 <- Union 27 (CONTAINS) -Map 34 <- Union 29 (CONTAINS) -Map 35 <- Union 31 (CONTAINS) -Reducer 10 <- Map 1 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE), Union 3 (CONTAINS) -Reducer 15 <- Union 14 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE), Union 3 (CONTAINS) +Map 25 <- Union 26 (CONTAINS) +Map 32 <- Union 26 (CONTAINS) +Map 33 <- Union 28 (CONTAINS) +Map 34 <- Union 30 (CONTAINS) +Reducer 10 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 20 <- Union 19 (SIMPLE_EDGE), Union 21 (CONTAINS) Reducer 22 <- Union 21 (SIMPLE_EDGE) -Reducer 28 <- Union 27 (SIMPLE_EDGE), Union 29 (CONTAINS) -Reducer 30 <- Union 29 (SIMPLE_EDGE), Union 31 (CONTAINS) -Reducer 32 <- Map 12 (BROADCAST_EDGE), Union 31 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 6 <- Union 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Union 26 (SIMPLE_EDGE), Union 28 (CONTAINS) +Reducer 29 <- Union 28 (SIMPLE_EDGE), Union 30 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) +Reducer 31 <- Map 16 (BROADCAST_EDGE), Union 30 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 8 <- Union 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) Stage-5 Stats Work{} @@ -3206,283 +3202,283 @@ Stage-5 Stage-4 Dependency Collection{} Stage-3 - Reducer 7 llap - File Output Operator [FS_130] - Group By Operator [GBY_128] (rows=1 width=880) + Reducer 10 llap + File Output Operator [FS_141] + Group By Operator [GBY_139] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_127] - Group By Operator [GBY_126] (rows=1 width=880) + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_138] + Group By Operator [GBY_137] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_125] (rows=544 width=10) + Select Operator [SEL_136] (rows=544 width=10) Output:["key","value"] - Group By Operator [GBY_120] (rows=544 width=10) + Group By Operator [GBY_122] (rows=544 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 32 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_426] + <-Union 7 [SIMPLE_EDGE] + <-Reducer 31 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_428] PartitionCols:_col0, _col1 - Group By Operator [GBY_425] (rows=1089 width=10) + Group By Operator [GBY_427] (rows=1089 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_424] (rows=484 width=10) + Select Operator [SEL_426] (rows=484 width=10) Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_423] (rows=484 width=10) - Conds:RS_360._col3=SEL_422._col0(Inner),Output:["_col1","_col2"] - <-Map 12 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_360] + Map Join Operator [MAPJOIN_425] (rows=484 width=10) + Conds:SEL_424._col0=RS_369._col3(Inner),Output:["_col2","_col3"] + <-Map 16 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_369] PartitionCols:_col3 - Map Join Operator [MAPJOIN_359] (rows=27 width=7) - Conds:SEL_357._col0=RS_353._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 25 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_353] + Map Join Operator [MAPJOIN_368] (rows=27 width=7) + Conds:SEL_366._col0=RS_362._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 35 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_362] PartitionCols:_col0 - Select Operator [SEL_352] (rows=25 width=7) + Select Operator [SEL_361] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_351] (rows=25 width=7) + Filter Operator [FIL_360] (rows=25 width=7) predicate:(key is not null and value is not null) - TableScan [TS_72] (rows=25 width=7) + TableScan [TS_106] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_357] (rows=25 width=7) + <-Select Operator [SEL_366] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_355] (rows=25 width=7) + Filter Operator [FIL_364] (rows=25 width=7) predicate:key is not null - TableScan [TS_3] (rows=25 width=7) + TableScan [TS_16] (rows=25 width=7) default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_422] (rows=440 width=10) + <-Select Operator [SEL_424] (rows=440 width=10) Output:["_col0"] - Group By Operator [GBY_421] (rows=440 width=10) + Group By Operator [GBY_423] (rows=440 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 31 [SIMPLE_EDGE] - <-Map 35 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_438] + <-Union 30 [SIMPLE_EDGE] + <-Map 34 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_440] PartitionCols:_col0, _col1 - Group By Operator [GBY_437] (rows=881 width=10) + Group By Operator [GBY_439] (rows=881 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_436] (rows=500 width=10) + Select Operator [SEL_438] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_435] (rows=500 width=10) + Filter Operator [FIL_437] (rows=500 width=10) predicate:value is not null - TableScan [TS_345] (rows=500 width=10) + TableScan [TS_347] (rows=500 width=10) Output:["key","value"] - <-Reducer 30 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_420] + <-Reducer 29 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_422] PartitionCols:_col0, _col1 - Group By Operator [GBY_419] (rows=881 width=10) + Group By Operator [GBY_421] (rows=881 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_418] (rows=381 width=10) + Select Operator [SEL_420] (rows=381 width=10) Output:["_col0","_col1"] - Group By Operator [GBY_417] (rows=381 width=10) + Group By Operator [GBY_419] (rows=381 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 29 [SIMPLE_EDGE] - <-Map 34 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_434] + <-Union 28 [SIMPLE_EDGE] + <-Map 33 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_436] PartitionCols:_col0, _col1 - Group By Operator [GBY_433] (rows=762 width=10) + Group By Operator [GBY_435] (rows=762 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_432] (rows=500 width=10) + Select Operator [SEL_434] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_431] (rows=500 width=10) + Filter Operator [FIL_433] (rows=500 width=10) predicate:value is not null - TableScan [TS_339] (rows=500 width=10) + TableScan [TS_341] (rows=500 width=10) Output:["key","value"] - <-Reducer 28 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_416] + <-Reducer 27 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_418] PartitionCols:_col0, _col1 - Group By Operator [GBY_415] (rows=762 width=10) + Group By Operator [GBY_417] (rows=762 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_414] (rows=262 width=10) + Select Operator [SEL_416] (rows=262 width=10) Output:["_col0","_col1"] - Group By Operator [GBY_413] (rows=262 width=10) + Group By Operator [GBY_415] (rows=262 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 27 [SIMPLE_EDGE] - <-Map 26 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_412] + <-Union 26 [SIMPLE_EDGE] + <-Map 25 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_414] PartitionCols:_col0, _col1 - Group By Operator [GBY_411] (rows=525 width=10) + Group By Operator [GBY_413] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_410] (rows=25 width=7) + Select Operator [SEL_412] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_409] (rows=25 width=7) + Filter Operator [FIL_411] (rows=25 width=7) predicate:value is not null - TableScan [TS_309] (rows=25 width=7) + TableScan [TS_311] (rows=25 width=7) Output:["key","value"] - <-Map 33 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_430] + <-Map 32 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_432] PartitionCols:_col0, _col1 - Group By Operator [GBY_429] (rows=525 width=10) + Group By Operator [GBY_431] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_428] (rows=500 width=10) + Select Operator [SEL_430] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_427] (rows=500 width=10) + Filter Operator [FIL_429] (rows=500 width=10) predicate:value is not null - TableScan [TS_333] (rows=500 width=10) + TableScan [TS_335] (rows=500 width=10) Output:["key","value"] - <-Reducer 4 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_371] + <-Reducer 6 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_377] PartitionCols:_col0, _col1 - Group By Operator [GBY_370] (rows=1089 width=10) + Group By Operator [GBY_376] (rows=1089 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_369] (rows=605 width=10) + Group By Operator [GBY_375] (rows=605 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 3 [SIMPLE_EDGE] - <-Reducer 11 [CONTAINS] llap - Reduce Output Operator [RS_273] + <-Union 5 [SIMPLE_EDGE] + <-Reducer 15 [CONTAINS] llap + Reduce Output Operator [RS_287] PartitionCols:_col0, _col1 - Group By Operator [GBY_272] (rows=1210 width=10) + Group By Operator [GBY_286] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_270] (rows=605 width=10) + Select Operator [SEL_284] (rows=605 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_269] (rows=605 width=10) - Conds:RS_58._col3=RS_384._col0(Inner),Output:["_col1","_col2"] - <-Reducer 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_58] + Merge Join Operator [MERGEJOIN_283] (rows=605 width=10) + Conds:RS_59._col3=RS_394._col0(Inner),Output:["_col1","_col2"] + <-Reducer 14 [SIMPLE_EDGE] llap + SHUFFLE [RS_59] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_252] (rows=550 width=10) - Conds:RS_364._col0=RS_381._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_364] + Merge Join Operator [MERGEJOIN_254] (rows=550 width=10) + Conds:RS_373._col0=RS_391._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 13 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_373] PartitionCols:_col0 - Select Operator [SEL_362] (rows=500 width=10) + Select Operator [SEL_371] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_361] (rows=500 width=10) + Filter Operator [FIL_370] (rows=500 width=10) predicate:key is not null - TableScan [TS_0] (rows=500 width=10) + TableScan [TS_13] (rows=500 width=10) default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Map 17 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_381] + SHUFFLE [RS_391] PartitionCols:_col0 - Select Operator [SEL_380] (rows=500 width=10) + Select Operator [SEL_390] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_379] (rows=500 width=10) + Filter Operator [FIL_389] (rows=500 width=10) predicate:(key is not null and value is not null) - TableScan [TS_29] (rows=500 width=10) + TableScan [TS_30] (rows=500 width=10) default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Reducer 22 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_384] + SHUFFLE [RS_394] PartitionCols:_col0 - Select Operator [SEL_383] (rows=381 width=10) + Select Operator [SEL_393] (rows=381 width=10) Output:["_col0"] - Group By Operator [GBY_382] (rows=381 width=10) + Group By Operator [GBY_392] (rows=381 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 21 [SIMPLE_EDGE] <-Map 24 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_408] + Reduce Output Operator [RS_410] PartitionCols:_col0, _col1 - Group By Operator [GBY_407] (rows=762 width=10) + Group By Operator [GBY_409] (rows=762 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_406] (rows=500 width=10) + Select Operator [SEL_408] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_405] (rows=500 width=10) + Filter Operator [FIL_407] (rows=500 width=10) predicate:value is not null - TableScan [TS_303] (rows=500 width=10) + TableScan [TS_305] (rows=500 width=10) Output:["key","value"] <-Reducer 20 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_400] + Reduce Output Operator [RS_402] PartitionCols:_col0, _col1 - Group By Operator [GBY_399] (rows=762 width=10) + Group By Operator [GBY_401] (rows=762 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_398] (rows=262 width=10) + Select Operator [SEL_400] (rows=262 width=10) Output:["_col0","_col1"] - Group By Operator [GBY_397] (rows=262 width=10) + Group By Operator [GBY_399] (rows=262 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Union 19 [SIMPLE_EDGE] <-Map 18 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_396] + Reduce Output Operator [RS_398] PartitionCols:_col0, _col1 - Group By Operator [GBY_395] (rows=525 width=10) + Group By Operator [GBY_397] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_394] (rows=25 width=7) + Select Operator [SEL_396] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_393] (rows=25 width=7) + Filter Operator [FIL_395] (rows=25 width=7) predicate:value is not null - TableScan [TS_286] (rows=25 width=7) + TableScan [TS_288] (rows=25 width=7) Output:["key","value"] <-Map 23 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_404] + Reduce Output Operator [RS_406] PartitionCols:_col0, _col1 - Group By Operator [GBY_403] (rows=525 width=10) + Group By Operator [GBY_405] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_402] (rows=500 width=10) + Select Operator [SEL_404] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_401] (rows=500 width=10) + Filter Operator [FIL_403] (rows=500 width=10) predicate:value is not null - TableScan [TS_297] (rows=500 width=10) + TableScan [TS_299] (rows=500 width=10) Output:["key","value"] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_264] + <-Reducer 4 [CONTAINS] llap + Reduce Output Operator [RS_272] PartitionCols:_col0, _col1 - Group By Operator [GBY_263] (rows=1210 width=10) + Group By Operator [GBY_271] (rows=1210 width=10) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_261] (rows=605 width=10) + Select Operator [SEL_269] (rows=605 width=10) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_260] (rows=605 width=10) - Conds:RS_365._col3=RS_368._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_365] + Merge Join Operator [MERGEJOIN_268] (rows=605 width=10) + Conds:RS_359._col0=RS_374._col3(Inner),Output:["_col2","_col3"] + <-Map 13 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_374] PartitionCols:_col3 - Map Join Operator [MAPJOIN_363] (rows=550 width=10) - Conds:SEL_362._col0=RS_358._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 12 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_358] + Map Join Operator [MAPJOIN_372] (rows=550 width=10) + Conds:SEL_371._col0=RS_367._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 16 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_367] PartitionCols:_col0 - Select Operator [SEL_356] (rows=25 width=7) + Select Operator [SEL_365] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_354] (rows=25 width=7) + Filter Operator [FIL_363] (rows=25 width=7) predicate:(key is not null and value is not null) - Please refer to the previous TableScan [TS_3] - Please refer to the previous Select Operator [SEL_362] - <-Reducer 15 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_368] + Please refer to the previous TableScan [TS_16] + Please refer to the previous Select Operator [SEL_371] + <-Reducer 3 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_359] PartitionCols:_col0 - Select Operator [SEL_367] (rows=262 width=10) + Select Operator [SEL_358] (rows=262 width=10) Output:["_col0"] - Group By Operator [GBY_366] (rows=262 width=10) + Group By Operator [GBY_357] (rows=262 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 14 [SIMPLE_EDGE] - <-Map 13 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_388] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_356] PartitionCols:_col0, _col1 - Group By Operator [GBY_387] (rows=525 width=10) + Group By Operator [GBY_355] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_386] (rows=25 width=7) + Select Operator [SEL_354] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_385] (rows=25 width=7) + Filter Operator [FIL_353] (rows=25 width=7) predicate:value is not null - TableScan [TS_274] (rows=25 width=7) + TableScan [TS_262] (rows=25 width=7) Output:["key","value"] - <-Map 16 [CONTAINS] vectorized, llap - Reduce Output Operator [RS_392] + <-Map 12 [CONTAINS] vectorized, llap + Reduce Output Operator [RS_388] PartitionCols:_col0, _col1 - Group By Operator [GBY_391] (rows=525 width=10) + Group By Operator [GBY_387] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_390] (rows=500 width=10) + Select Operator [SEL_386] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_389] (rows=500 width=10) + Filter Operator [FIL_385] (rows=500 width=10) predicate:value is not null - TableScan [TS_280] (rows=500 width=10) + TableScan [TS_277] (rows=500 width=10) Output:["key","value"] - Reducer 8 llap - File Output Operator [FS_139] - Group By Operator [GBY_137] (rows=1 width=880) + Reducer 11 llap + File Output Operator [FS_150] + Group By Operator [GBY_148] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_136] - Group By Operator [GBY_135] (rows=1 width=880) + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_147] + Group By Operator [GBY_146] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_134] (rows=544 width=10) + Select Operator [SEL_145] (rows=544 width=10) Output:["key","value"] - Please refer to the previous Group By Operator [GBY_120] + Please refer to the previous Group By Operator [GBY_122] Reducer 9 llap - File Output Operator [FS_148] - Group By Operator [GBY_146] (rows=1 width=880) + File Output Operator [FS_132] + Group By Operator [GBY_130] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_145] - Group By Operator [GBY_144] (rows=1 width=880) + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_129] + Group By Operator [GBY_128] (rows=1 width=880) Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"] - Select Operator [SEL_143] (rows=544 width=10) + Select Operator [SEL_127] (rows=544 width=10) Output:["key","value"] - Please refer to the previous Group By Operator [GBY_120] + Please refer to the previous Group By Operator [GBY_122] Stage-6 Stats Work{} Stage-1 diff --git a/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out index 40ac1f2749..8ee371c111 100644 --- a/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out @@ -53,31 +53,31 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: f - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + alias: g + filterExpr: (value <> '') (type: boolean) + Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 15 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (value <> '') (type: boolean) + Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 15 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 15 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 auto parallelism: true Execution mode: vectorized, llap @@ -134,30 +134,29 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [f] - Map 4 + /filter_join_breaktask/ds=2008-04-08 [g] + Map 3 Map Operator Tree: TableScan - alias: m - filterExpr: (key is not null and (value <> '')) (type: boolean) - Statistics: Num rows: 25 Data size: 2289 Basic stats: COMPLETE Column stats: COMPLETE + alias: f + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and (value <> '')) (type: boolean) - Statistics: Num rows: 15 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE + predicate: key is not null (type: boolean) + Statistics: Num rows: 15 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 15 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 15 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - value expressions: _col1 (type: string) + Statistics: Num rows: 15 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 auto parallelism: true Execution mode: vectorized, llap LLAP IO: no inputs @@ -213,29 +212,30 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [m] + /filter_join_breaktask/ds=2008-04-08 [f] Map 5 Map Operator Tree: TableScan - alias: g - filterExpr: (value <> '') (type: boolean) - Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE + alias: m + filterExpr: (key is not null and (value <> '')) (type: boolean) + Statistics: Num rows: 25 Data size: 2289 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value <> '') (type: boolean) - Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key is not null and (value <> '')) (type: boolean) + Statistics: Num rows: 15 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 15 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 + value expressions: _col1 (type: string) auto parallelism: true Execution mode: vectorized, llap LLAP IO: no inputs @@ -291,7 +291,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [g] + /filter_join_breaktask/ds=2008-04-08 [m] Reducer 2 Execution mode: llap Needs Tagging: false @@ -300,35 +300,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Position of Big Table: 1 - Statistics: Num rows: 15 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 15 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col0 (type: int) - auto parallelism: true - Reducer 3 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col3 - Position of Big Table: 1 + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1 + Position of Big Table: 0 Statistics: Num rows: 19 Data size: 1747 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col3 (type: string) + expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 19 Data size: 1747 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -353,6 +331,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false + Reducer 4 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Position of Big Table: 1 + Statistics: Num rows: 15 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 15 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + value expressions: _col0 (type: int) + auto parallelism: true Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/groupby_groupingset_bug.q.out b/ql/src/test/results/clientpositive/llap/groupby_groupingset_bug.q.out index 3c944483fb..a66b96eb84 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_groupingset_bug.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_groupingset_bug.q.out @@ -213,108 +213,110 @@ POSTHOOK: Input: default@x1_store_sales@ss_sold_date_sk=2 Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 3 (BROADCAST_EDGE) -Map 4 <- Map 1 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE) +Map 1 <- Map 4 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE) +Map 4 <- Reducer 6 (BROADCAST_EDGE) Map 7 <- Reducer 9 (BROADCAST_EDGE) -Reducer 3 <- Map 2 (SIMPLE_EDGE) -Reducer 5 <- Map 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized, llap - File Output Operator [FS_145] - Limit [LIM_144] (rows=2 width=8) + Reducer 3 vectorized, llap + File Output Operator [FS_146] + Limit [LIM_145] (rows=2 width=8) Number of rows:100 - Select Operator [SEL_143] (rows=2 width=8) + Select Operator [SEL_144] (rows=2 width=8) Output:["_col0"] - <-Reducer 5 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_142] - Select Operator [SEL_141] (rows=2 width=8) + <-Reducer 2 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_143] + Select Operator [SEL_142] (rows=2 width=8) Output:["_col0"] - Top N Key Operator [TNK_140] (rows=2 width=12) + Top N Key Operator [TNK_141] (rows=2 width=12) keys:_col1,top n:100 - Group By Operator [GBY_139] (rows=2 width=12) + Group By Operator [GBY_140] (rows=2 width=12) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 4 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_138] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_139] PartitionCols:_col0 - Group By Operator [GBY_137] (rows=2 width=12) + Group By Operator [GBY_138] (rows=2 width=12) Output:["_col0","_col1"],aggregations:["count()"],keys:_col8 - Map Join Operator [MAPJOIN_136] (rows=5185194 width=4) - Conds:MAPJOIN_135._col6=RS_129._col0(Inner),Output:["_col8"] + Map Join Operator [MAPJOIN_137] (rows=5185194 width=4) + Conds:MAPJOIN_136._col1=RS_130._col0(Inner),Output:["_col8"] <-Map 7 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_129] + BROADCAST [RS_130] PartitionCols:_col0 - Map Join Operator [MAPJOIN_128] (rows=28 width=8) - Conds:SEL_127._col1=RS_125._col0(Inner),Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_129] (rows=28 width=8) + Conds:SEL_128._col1=RS_126._col0(Inner),Output:["_col0","_col1"] <-Reducer 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_125] + BROADCAST [RS_126] PartitionCols:_col0 - Group By Operator [GBY_124] (rows=2 width=4) + Group By Operator [GBY_125] (rows=2 width=4) Output:["_col0"],keys:KEY._col0 <-Map 8 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_123] + SHUFFLE [RS_124] PartitionCols:_col0 - Group By Operator [GBY_122] (rows=2 width=4) + Group By Operator [GBY_123] (rows=2 width=4) Output:["_col0"],keys:d_month_seq - Select Operator [SEL_121] (rows=4 width=12) + Select Operator [SEL_122] (rows=4 width=12) Output:["d_month_seq"] - Filter Operator [FIL_120] (rows=4 width=12) + Filter Operator [FIL_121] (rows=4 width=12) predicate:((d_year = 2000) and ((d_year * d_moy) > 200000) and (d_moy = 2) and d_month_seq is not null) - TableScan [TS_17] (rows=28 width=12) + TableScan [TS_21] (rows=28 width=12) default@x1_date_dim,x1_date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] - <-Select Operator [SEL_127] (rows=28 width=8) + <-Select Operator [SEL_128] (rows=28 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_126] (rows=28 width=8) + Filter Operator [FIL_127] (rows=28 width=8) predicate:(d_date_sk is not null and d_month_seq is not null) - TableScan [TS_14] (rows=28 width=8) + TableScan [TS_18] (rows=28 width=8) default@x1_date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] - Dynamic Partitioning Event Operator [EVENT_132] (rows=1 width=4) - Group By Operator [GBY_131] (rows=1 width=4) + Dynamic Partitioning Event Operator [EVENT_133] (rows=1 width=4) + Group By Operator [GBY_132] (rows=1 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_130] (rows=28 width=4) + Select Operator [SEL_131] (rows=28 width=4) Output:["_col0"] - Please refer to the previous Map Join Operator [MAPJOIN_128] - <-Map Join Operator [MAPJOIN_135] (rows=370371 width=4) - Conds:RS_31._col0=SEL_134._col0(Inner),Output:["_col6"] - <-Map 1 [BROADCAST_EDGE] llap - BROADCAST [RS_31] + Please refer to the previous Map Join Operator [MAPJOIN_129] + <-Map Join Operator [MAPJOIN_136] (rows=370371 width=4) + Conds:SEL_135._col0=RS_33._col0(Inner),Output:["_col1"] + <-Map 4 [BROADCAST_EDGE] llap + BROADCAST [RS_33] PartitionCols:_col0 - Map Join Operator [MAPJOIN_103] (rows=6 width=228) - Conds:SEL_2._col1=RS_117._col1(Inner),Output:["_col0","_col2","_col3"],residual filter predicates:{(_col2 > _col3)} - <-Reducer 3 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_117] - PartitionCols:_col1 - Select Operator [SEL_116] (rows=1 width=197) - Output:["_col0","_col1"] - Filter Operator [FIL_115] (rows=1 width=197) - predicate:_col1 is not null - Group By Operator [GBY_114] (rows=1 width=197) - Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0 - <-Map 2 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_113] - PartitionCols:_col0 - Group By Operator [GBY_112] (rows=1 width=197) - Output:["_col0","_col1"],aggregations:["min(i_current_price)"],keys:i_category - Filter Operator [FIL_111] (rows=18 width=197) - predicate:i_category is not null - TableScan [TS_3] (rows=18 width=197) - default@x1_item,j,Tbl:COMPLETE,Col:COMPLETE,Output:["i_category","i_current_price"] - <-Select Operator [SEL_2] (rows=18 width=201) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_58] (rows=18 width=201) - predicate:(i_item_sk is not null and i_category is not null and i_current_price is not null) - TableScan [TS_0] (rows=18 width=201) - default@x1_item,i,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category","i_current_price"] - <-Select Operator [SEL_134] (rows=123457 width=8) + Select Operator [SEL_17] (rows=6 width=228) + Output:["_col0"] + Map Join Operator [MAPJOIN_104] (rows=6 width=228) + Conds:SEL_5._col1=RS_118._col1(Inner),Output:["_col0","_col2","_col3"],residual filter predicates:{(_col2 > _col3)} + <-Reducer 6 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_118] + PartitionCols:_col1 + Select Operator [SEL_117] (rows=1 width=197) + Output:["_col0","_col1"] + Filter Operator [FIL_116] (rows=1 width=197) + predicate:_col1 is not null + Group By Operator [GBY_115] (rows=1 width=197) + Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0 + <-Map 5 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_114] + PartitionCols:_col0 + Group By Operator [GBY_113] (rows=1 width=197) + Output:["_col0","_col1"],aggregations:["min(i_current_price)"],keys:i_category + Filter Operator [FIL_112] (rows=18 width=197) + predicate:i_category is not null + TableScan [TS_6] (rows=18 width=197) + default@x1_item,j,Tbl:COMPLETE,Col:COMPLETE,Output:["i_category","i_current_price"] + <-Select Operator [SEL_5] (rows=18 width=201) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_60] (rows=18 width=201) + predicate:(i_item_sk is not null and i_category is not null and i_current_price is not null) + TableScan [TS_3] (rows=18 width=201) + default@x1_item,i,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category","i_current_price"] + <-Select Operator [SEL_135] (rows=123457 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_133] (rows=123457 width=8) + Filter Operator [FIL_134] (rows=123457 width=8) predicate:ss_item_sk is not null - TableScan [TS_11] (rows=123457 width=8) + TableScan [TS_0] (rows=123457 width=8) default@x1_store_sales,s,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk"] PREHOOK: query: select count(*) cnt diff --git a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out index 8072a8fd88..efa2919cda 100644 --- a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out @@ -57,12 +57,149 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: z + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 3 => 39 + keys: + 0 _col0 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col0, _col2, _col3 + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: string), _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1_n21 + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct dest_j1_n21 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1_n21 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + minReductionHashAggr: 0.984127 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [z] + Map 3 Map Operator Tree: TableScan alias: y @@ -80,13 +217,13 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 2 => 25 + Estimated key counts: Map 4 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 39 Data size: 10374 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -95,7 +232,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 39 Data size: 10374 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 + tag: 1 value expressions: _col1 (type: string), _col2 (type: string) auto parallelism: true Execution mode: vectorized, llap @@ -153,7 +290,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] - Map 2 + Map 4 Map Operator Tree: TableScan alias: x @@ -232,210 +369,73 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 3 - Map Operator Tree: - TableScan - alias: z - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 GatherStats: false - Filter Operator - isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 1 => 39 - keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col4 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 1 + MultiFileSpray: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value,val2 + columns.comments + columns.types string:string:string #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string + name default.dest_j1_n21 + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct dest_j1_n21 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 #### A masked pattern was here #### - name default.dest_j1_n21 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest_j1_n21 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1_n21 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, value, val2 - Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') - minReductionHashAggr: 0.984127 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] - Reducer 4 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types struct:struct:struct - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1_n21 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest_j1_n21 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1_n21 - - Stage: Stage-3 - Stats Work - Basic Stats Work: + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1_n21 + + Stage: Stage-3 + Stats Work + Basic Stats Work: #### A masked pattern was here #### Column Stats Desc: Columns: key, value, val2 @@ -603,56 +603,101 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) - Map 4 <- Map 3 (BROADCAST_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) + Map 4 <- Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: x - filterExpr: (value is not null and key is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + alias: w + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 2 => 25 + Estimated key counts: Map 3 => 61 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + 1 _col3 (type: string) + outputColumnNames: _col2, _col3, _col6 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 - Statistics: Num rows: 39 Data size: 10296 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 39 Data size: 10296 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col1 (type: string), _col3 (type: string) - auto parallelism: true - Execution mode: vectorized, llap + Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: string), _col6 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1_n21 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j1_n21 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1_n21 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + minReductionHashAggr: 0.989899 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Execution mode: llap LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -664,14 +709,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -686,44 +731,56 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [x] - Map 2 + /src [w] + Map 3 Map Operator Tree: TableScan - alias: z + alias: y filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: true + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 4 => 39 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3, _col5 + input vertices: + 1 Map 4 + Position of Big Table: 0 + Statistics: Num rows: 61 Data size: 21655 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 61 Data size: 21655 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: string) + auto parallelism: true Execution mode: vectorized, llap LLAP IO: no inputs Path -> Alias: @@ -731,7 +788,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -743,14 +800,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -765,55 +822,55 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [z] - Map 3 + /src [y] + Map 4 Map Operator Tree: TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + alias: x + filterExpr: (value is not null and key is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (value is not null and key is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 1 => 39 + Estimated key counts: Map 5 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col5 + outputColumnNames: _col0, _col1, _col3 input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 61 Data size: 21655 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 5 + Position of Big Table: 0 + Statistics: Num rows: 39 Data size: 10296 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 61 Data size: 21655 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col0 (type: string), _col3 (type: string), _col5 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 39 Data size: 10296 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + value expressions: _col1 (type: string), _col3 (type: string) auto parallelism: true Execution mode: vectorized, llap LLAP IO: no inputs @@ -822,7 +879,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -834,14 +891,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -856,109 +913,52 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [y] - Map 4 + /src1 [x] + Map 5 Map Operator Tree: TableScan - alias: w - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + alias: z + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 61 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col3, _col5 - input vertices: - 0 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1_n21 - numFiles 1 - numRows 85 - rawDataSize 1600 - serialization.ddl struct dest_j1_n21 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1685 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1_n21 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, value, val2 - Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') - minReductionHashAggr: 0.989899 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - auto parallelism: false - Execution mode: llap + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: true + Execution mode: vectorized, llap LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -970,14 +970,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -992,21 +992,21 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [w] - Reducer 5 + /src1 [z] + Reducer 2 Execution mode: llap Needs Tagging: false Reduce Operator Tree: @@ -1237,50 +1237,187 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + alias: z + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 2 => 25 + Estimated key counts: Map 3 => 39 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + 1 _col2 (type: string) + outputColumnNames: _col0, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col1 (type: string) - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2_n1 + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct dest_j2_n1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2_n1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + minReductionHashAggr: 0.984127 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [z] + Map 3 + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 4 => 25 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + input vertices: + 1 Map 4 + Position of Big Table: 0 + Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: no inputs + Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### @@ -1333,7 +1470,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] - Map 2 + Map 4 Map Operator Tree: TableScan alias: x @@ -1412,144 +1549,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 3 - Map Operator Tree: - TableScan - alias: z - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 1 => 39 - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j2_n1 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest_j2_n1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2_n1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, value, val2 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') - minReductionHashAggr: 0.984127 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] - Reducer 4 + Reducer 2 Execution mode: llap Needs Tagging: false Reduce Operator Tree: @@ -1781,13 +1781,150 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### - Edges: - Map 4 <- Reducer 2 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Edges: + Map 1 <- Reducer 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: z + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Reducer 4 => 64 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Reducer 4 + Position of Big Table: 0 + Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### - Vertices: - Map 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2_n1 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j2_n1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2_n1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Execution mode: llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [z] + Map 3 Map Operator Tree: TableScan alias: x @@ -1866,7 +2003,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 3 + Map 5 Map Operator Tree: TableScan alias: y @@ -1927,183 +2064,24 @@ STAGE PLANS: bucketing_version 2 column.name.delimiter , columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [y] - Map 4 - Map Operator Tree: - TableScan - alias: z - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Reducer 2 => 64 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 2 - Position of Big Table: 1 - Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j2_n1 - numFiles 1 - numRows 85 - rawDataSize 1600 - serialization.ddl struct dest_j2_n1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1685 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2_n1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, value, val2 - Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - auto parallelism: false - Execution mode: llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: hr=11 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src [y] Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Position of Big Table: 0 - Statistics: Num rows: 64 Data size: 11200 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 64 Data size: 11200 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col0 (type: string) - auto parallelism: true - Reducer 5 Execution mode: llap Needs Tagging: false Reduce Operator Tree: @@ -2134,6 +2112,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false + Reducer 4 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Position of Big Table: 0 + Statistics: Num rows: 64 Data size: 11200 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 64 Data size: 11200 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + value expressions: _col0 (type: string) + auto parallelism: true Stage: Stage-2 Dependency Collection @@ -2338,12 +2338,64 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: x + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2_n1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + minReductionHashAggr: 0.984127 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Execution mode: llap + LLAP IO: no inputs + Map 3 Map Operator Tree: TableScan alias: y @@ -2364,7 +2416,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 2 + 1 Map 4 Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) @@ -2375,7 +2427,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: x @@ -2397,59 +2449,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: x - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2_n1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, value, val2 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') - minReductionHashAggr: 0.984127 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 4 + Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2635,12 +2635,64 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: y + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2_n1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + minReductionHashAggr: 0.984127 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Execution mode: llap + LLAP IO: no inputs + Map 3 Map Operator Tree: TableScan alias: y @@ -2661,7 +2713,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 2 + 1 Map 4 Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) @@ -2672,7 +2724,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: x @@ -2694,59 +2746,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: y - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2_n1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, value, val2 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') - minReductionHashAggr: 0.984127 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Execution mode: llap - LLAP IO: no inputs - Reducer 4 + Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/join_reordering_no_stats.q.out b/ql/src/test/results/clientpositive/llap/join_reordering_no_stats.q.out index 4e90697fd6..0fe826889d 100644 --- a/ql/src/test/results/clientpositive/llap/join_reordering_no_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/join_reordering_no_stats.q.out @@ -473,12 +473,33 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: employee_part_n1 + filterExpr: employeeid is not null (type: boolean) + Statistics: Num rows: 8 Data size: 28 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: employeeid is not null (type: boolean) + Statistics: Num rows: 6 Data size: 21 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: employeeid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 21 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 21 Basic stats: PARTIAL Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan alias: lineitem_nostats @@ -500,7 +521,7 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: supplier_nostats @@ -521,45 +542,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: employee_part_n1 - filterExpr: employeeid is not null (type: boolean) - Statistics: Num rows: 8 Data size: 28 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: employeeid is not null (type: boolean) - Statistics: Num rows: 6 Data size: 21 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: employeeid (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 21 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 21 Basic stats: PARTIAL Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -580,7 +563,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -595,6 +578,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/limit_join_transpose.q.out b/ql/src/test/results/clientpositive/llap/limit_join_transpose.q.out index ba1860faf2..8c8e4e9f98 100644 --- a/ql/src/test/results/clientpositive/llap/limit_join_transpose.q.out +++ b/ql/src/test/results/clientpositive/llap/limit_join_transpose.q.out @@ -432,26 +432,22 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 + Right Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 5 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1426,27 +1422,23 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 + Right Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 5 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 1 - Offset of rows: 1 + Limit + Number of rows: 1 + Offset of rows: 1 + Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 3 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out index 8f4c9bf4ab..44ff42330e 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_mapjoin.q.out @@ -37,10 +37,88 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: src + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: no inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Map 2 Map Operator Tree: TableScan alias: srcpart @@ -58,49 +136,53 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 2 => 25 + Estimated key counts: Map 3 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 3 => 500 + Estimated key counts: Map 1 => 500 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1 input vertices: - 1 Map 3 - Position of Big Table: 0 + 0 Map 1 + Position of Big Table: 1 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Execution mode: vectorized, llap LLAP IO: no inputs Path -> Alias: @@ -307,7 +389,7 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] - Map 2 + Map 3 Map Operator Tree: TableScan alias: src1 @@ -385,84 +467,6 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [src1] - Map 3 - Map Operator Tree: - TableScan - alias: src - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] Stage: Stage-0 Fetch Operator @@ -501,10 +505,31 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: src + filterExpr: (value > 'val_450') (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value > 'val_450') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 2 Map Operator Tree: TableScan alias: srcpart @@ -525,28 +550,32 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 732 Data size: 7782 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1 input vertices: - 1 Map 3 + 0 Map 1 Statistics: Num rows: 805 Data size: 8560 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 805 Data size: 8560 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 805 Data size: 8560 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: src1 @@ -567,27 +596,6 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src - filterExpr: (value > 'val_450') (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (value > 'val_450') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out index 7fe1fae59d..602a89b7a3 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out @@ -895,12 +895,34 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 7 <- Union 4 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) + Map 7 <- Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: dependents_n4 + filterExpr: name is not null (type: boolean) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: name is not null (type: boolean) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: empid (type: int), name (type: varchar(256)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: varchar(256)) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: varchar(256)) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 4 Map Operator Tree: TableScan alias: depts_n6 @@ -922,7 +944,7 @@ STAGE PLANS: value expressions: _col1 (type: varchar(256)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 5 + Map 6 Map Operator Tree: TableScan alias: emps_n8 @@ -943,28 +965,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 6 - Map Operator Tree: - TableScan - alias: dependents_n4 - filterExpr: name is not null (type: boolean) - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: name is not null (type: boolean) - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: empid (type: int), name (type: varchar(256)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: varchar(256)) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: varchar(256)) - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) Map 7 Map Operator Tree: TableScan @@ -984,6 +984,24 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: varchar(256)) + 1 _col1 (type: varchar(256)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1000,30 +1018,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: varchar(256)) Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: varchar(256)) - 1 _col1 (type: varchar(256)) - outputColumnNames: _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col3 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 4 - Vertex: Union 4 + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/mergejoin.q.out b/ql/src/test/results/clientpositive/llap/mergejoin.q.out index cb47dd45ab..19e727e269 100644 --- a/ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -2024,16 +2024,69 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 7 (BROADCAST_EDGE) - Map 8 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Reducer 6 (BROADCAST_EDGE) + Map 4 <- Reducer 8 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: b + filterExpr: (key is not null and key BETWEEN DynamicValue(RS_14_a_key_min) AND DynamicValue(RS_14_a_key_max) and in_bloom_filter(key, DynamicValue(RS_14_a_key_bloom_filter))) (type: boolean) + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), FilterLongColumnBetweenDynamicValue(col 0:int, left 0, right 0), VectorInBloomFilterColDynamicValue) + predicate: (key is not null and key BETWEEN DynamicValue(RS_14_a_key_min) AND DynamicValue(RS_14_a_key_max) and in_bloom_filter(key, DynamicValue(RS_14_a_key_bloom_filter))) (type: boolean) + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 1 + partitionColumns: ds:string + scratchColumnTypeNames: [] + Map 4 Map Operator Tree: TableScan alias: a @@ -2088,7 +2141,7 @@ STAGE PLANS: partitionColumnCount: 1 partitionColumns: ds:string scratchColumnTypeNames: [] - Map 6 + Map 7 Map Operator Tree: TableScan alias: c @@ -2171,95 +2224,7 @@ STAGE PLANS: dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 8 - Map Operator Tree: - TableScan - alias: b - filterExpr: (key is not null and key BETWEEN DynamicValue(RS_12_a_key_min) AND DynamicValue(RS_12_a_key_max) and in_bloom_filter(key, DynamicValue(RS_12_a_key_bloom_filter))) (type: boolean) - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), FilterLongColumnBetweenDynamicValue(col 0:int, left 0, right 0), VectorInBloomFilterColDynamicValue) - predicate: (key is not null and key BETWEEN DynamicValue(RS_12_a_key_min) AND DynamicValue(RS_12_a_key_max) and in_bloom_filter(key, DynamicValue(RS_12_a_key_bloom_filter))) (type: boolean) - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumns: 0:int - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 1 - partitionColumns: ds:string - scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=25) - minReductionHashAggr: 0.974359 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -2283,7 +2248,7 @@ STAGE PLANS: MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -2322,6 +2287,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=25) + minReductionHashAggr: 0.974359 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 6 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -2359,7 +2359,7 @@ STAGE PLANS: valueColumns: 0:int, 1:int, 2:binary Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 7 + Reducer 8 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -3159,16 +3159,69 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 7 (BROADCAST_EDGE) - Map 8 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Reducer 6 (BROADCAST_EDGE) + Map 4 <- Reducer 8 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: b + filterExpr: (key is not null and key BETWEEN DynamicValue(RS_14_a_key_min) AND DynamicValue(RS_14_a_key_max) and in_bloom_filter(key, DynamicValue(RS_14_a_key_bloom_filter))) (type: boolean) + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), FilterLongColumnBetweenDynamicValue(col 0:int, left 0, right 0), VectorInBloomFilterColDynamicValue) + predicate: (key is not null and key BETWEEN DynamicValue(RS_14_a_key_min) AND DynamicValue(RS_14_a_key_max) and in_bloom_filter(key, DynamicValue(RS_14_a_key_bloom_filter))) (type: boolean) + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 1 + partitionColumns: ds:string + scratchColumnTypeNames: [] + Map 4 Map Operator Tree: TableScan alias: a @@ -3223,7 +3276,7 @@ STAGE PLANS: partitionColumnCount: 1 partitionColumns: ds:string scratchColumnTypeNames: [] - Map 6 + Map 7 Map Operator Tree: TableScan alias: c @@ -3306,95 +3359,7 @@ STAGE PLANS: dataColumns: key:string, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 8 - Map Operator Tree: - TableScan - alias: b - filterExpr: (key is not null and key BETWEEN DynamicValue(RS_12_a_key_min) AND DynamicValue(RS_12_a_key_max) and in_bloom_filter(key, DynamicValue(RS_12_a_key_bloom_filter))) (type: boolean) - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ds:string, 3:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), FilterLongColumnBetweenDynamicValue(col 0:int, left 0, right 0), VectorInBloomFilterColDynamicValue) - predicate: (key is not null and key BETWEEN DynamicValue(RS_12_a_key_min) AND DynamicValue(RS_12_a_key_max) and in_bloom_filter(key, DynamicValue(RS_12_a_key_bloom_filter))) (type: boolean) - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumns: 0:int - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 1 - partitionColumns: ds:string - scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=25) - minReductionHashAggr: 0.974359 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -3418,7 +3383,7 @@ STAGE PLANS: MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -3457,6 +3422,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=25) + minReductionHashAggr: 0.974359 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 6 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -3494,7 +3494,7 @@ STAGE PLANS: valueColumns: 0:int, 1:int, 2:binary Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 7 + Reducer 8 Execution mode: vectorized, llap Reduce Vectorization: enabled: true diff --git a/ql/src/test/results/clientpositive/llap/q93_with_constraints.q.out b/ql/src/test/results/clientpositive/llap/q93_with_constraints.q.out index c29edc3bcf..4f1b7a6dac 100644 --- a/ql/src/test/results/clientpositive/llap/q93_with_constraints.q.out +++ b/ql/src/test/results/clientpositive/llap/q93_with_constraints.q.out @@ -229,13 +229,35 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_item_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 70041069312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_item_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 518396071 Data size: 63036962320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 518396071 Data size: 63036962320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: int) + Statistics: Num rows: 518396071 Data size: 63036962320 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 5 Map Operator Tree: TableScan alias: store_returns @@ -257,7 +279,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: reason @@ -278,47 +300,7 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 913 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs - Map 7 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_item_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 70041069312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_item_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 518396071 Data size: 63036962320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 518396071 Data size: 63036962320 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col2 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col2 (type: int) - Statistics: Num rows: 518396071 Data size: 63036962320 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) - Execution mode: vectorized, llap - LLAP IO: all inputs Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 60182753 Data size: 914777879 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col2 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col2 (type: int) - Statistics: Num rows: 60182753 Data size: 914777879 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int) - Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -327,10 +309,10 @@ STAGE PLANS: keys: 0 _col0 (type: int), _col2 (type: int) 1 _col0 (type: int), _col2 (type: int) - outputColumnNames: _col3, _col6, _col8, _col9 + outputColumnNames: _col1, _col3, _col4, _col8 Statistics: Num rows: 570235690 Data size: 69340660054 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col6 (type: int), CASE WHEN (_col3 is not null) THEN ((CAST( (_col8 - _col3) AS decimal(10,0)) * _col9)) ELSE ((CAST( _col8 AS decimal(10,0)) * _col9)) END (type: decimal(18,2)) + expressions: _col1 (type: int), CASE WHEN (_col8 is not null) THEN ((CAST( (_col3 - _col8) AS decimal(10,0)) * _col4)) ELSE ((CAST( _col3 AS decimal(10,0)) * _col4)) END (type: decimal(18,2)) outputColumnNames: _col0, _col1 Statistics: Num rows: 570235690 Data size: 69340660054 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -347,7 +329,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 570235690 Data size: 69340660054 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(28,2)) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -368,7 +350,7 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 285117845 Data size: 34670330027 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 5 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -385,6 +367,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 60182753 Data size: 914777879 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: int) + Statistics: Num rows: 60182753 Data size: 914777879 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/retry_failure_reorder.q.out b/ql/src/test/results/clientpositive/llap/retry_failure_reorder.q.out index 82c194838c..54b174fa55 100644 --- a/ql/src/test/results/clientpositive/llap/retry_failure_reorder.q.out +++ b/ql/src/test/results/clientpositive/llap/retry_failure_reorder.q.out @@ -259,59 +259,59 @@ PREHOOK: Input: default@tw Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 vectorized, llap - File Output Operator [FS_63] - Select Operator [SEL_62] (rows=1 width=4) + Reducer 3 vectorized, llap + File Output Operator [FS_64] + Select Operator [SEL_63] (rows=1 width=4) Output:["_col0"] - Group By Operator [GBY_61] (rows=1 width=8) + Group By Operator [GBY_62] (rows=1 width=8) Output:["_col0"],aggregations:["sum(VALUE._col0)"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_18] - Group By Operator [GBY_17] (rows=1 width=8) + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_19] + Group By Operator [GBY_18] (rows=1 width=8) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_15] (rows=87 width=12) + Select Operator [SEL_16] (rows=87 width=12) Output:["_col0"] - Merge Join Operator [MERGEJOIN_51] (rows=87 width=12) - Conds:RS_12._col0=RS_60._col0(Inner),Output:["_col2","_col4","_col6"] - <-Map 6 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_60] + Merge Join Operator [MERGEJOIN_52] (rows=87 width=12) + Conds:RS_55._col0=RS_14._col0(Inner),Output:["_col1","_col4","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_55] PartitionCols:_col0 - Select Operator [SEL_59] (runtime: rows=25 width=8) + Select Operator [SEL_54] (runtime: rows=25 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_58] (rows=23 width=8) + Filter Operator [FIL_53] (rows=23 width=8) predicate:((v > 3) and id_uv is not null) - TableScan [TS_6] (runtime: rows=30 width=8) + TableScan [TS_0] (runtime: rows=30 width=8) default@tv,tv,Tbl:COMPLETE,Col:COMPLETE,Output:["id_uv","v"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_50] (rows=21 width=12) - Conds:RS_54._col1=RS_57._col0(Inner),Output:["_col0","_col2","_col4"] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_54] + Merge Join Operator [MERGEJOIN_51] (rows=21 width=12) + Conds:RS_58._col1=RS_61._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 4 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_58] PartitionCols:_col1 - Select Operator [SEL_53] (runtime: rows=30 width=12) + Select Operator [SEL_57] (runtime: rows=30 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_52] (rows=35 width=12) + Filter Operator [FIL_56] (rows=35 width=12) predicate:((u > 1) and id_uv is not null and id_uw is not null) - TableScan [TS_0] (runtime: rows=35 width=12) + TableScan [TS_3] (runtime: rows=35 width=12) default@tu,tu,Tbl:COMPLETE,Col:COMPLETE,Output:["id_uv","id_uw","u"] - <-Map 5 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_57] + <-Map 6 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_61] PartitionCols:_col0 - Select Operator [SEL_56] (runtime: rows=5 width=8) + Select Operator [SEL_60] (runtime: rows=5 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_55] (rows=6 width=8) + Filter Operator [FIL_59] (rows=6 width=8) predicate:((w > 9) and id_uw is not null) - TableScan [TS_3] (runtime: rows=50 width=8) + TableScan [TS_6] (runtime: rows=50 width=8) default@tw,tw,Tbl:COMPLETE,Col:COMPLETE,Output:["id_uw","w"] PREHOOK: query: select assert_true_oom(1 > sum(u*v*w)) from tu @@ -348,38 +348,38 @@ Stage-1 FILE SYSTEM COUNTERS: Stage-1 HIVE COUNTERS: CREATED_FILES: 1 DESERIALIZE_ERRORS: 0 - RECORDS_IN_Map_1: 35 - RECORDS_IN_Map_5: 50 - RECORDS_IN_Map_6: 30 + RECORDS_IN_Map_1: 30 + RECORDS_IN_Map_4: 35 + RECORDS_IN_Map_6: 50 RECORDS_OUT_0: 1 - RECORDS_OUT_INTERMEDIATE_Map_1: 30 - RECORDS_OUT_INTERMEDIATE_Map_5: 5 - RECORDS_OUT_INTERMEDIATE_Map_6: 25 - RECORDS_OUT_INTERMEDIATE_Reducer_2: 25 - RECORDS_OUT_INTERMEDIATE_Reducer_3: 1 - RECORDS_OUT_INTERMEDIATE_Reducer_4: 0 - RECORDS_OUT_OPERATOR_FIL_52: 30 - RECORDS_OUT_OPERATOR_FIL_55: 5 - RECORDS_OUT_OPERATOR_FIL_58: 25 - RECORDS_OUT_OPERATOR_FS_63: 1 - RECORDS_OUT_OPERATOR_GBY_17: 1 - RECORDS_OUT_OPERATOR_GBY_61: 1 + RECORDS_OUT_INTERMEDIATE_Map_1: 25 + RECORDS_OUT_INTERMEDIATE_Map_4: 30 + RECORDS_OUT_INTERMEDIATE_Map_6: 5 + RECORDS_OUT_INTERMEDIATE_Reducer_2: 1 + RECORDS_OUT_INTERMEDIATE_Reducer_3: 0 + RECORDS_OUT_INTERMEDIATE_Reducer_5: 25 + RECORDS_OUT_OPERATOR_FIL_53: 25 + RECORDS_OUT_OPERATOR_FIL_56: 30 + RECORDS_OUT_OPERATOR_FIL_59: 5 + RECORDS_OUT_OPERATOR_FS_64: 1 + RECORDS_OUT_OPERATOR_GBY_18: 1 + RECORDS_OUT_OPERATOR_GBY_62: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 - RECORDS_OUT_OPERATOR_MERGEJOIN_50: 25 - RECORDS_OUT_OPERATOR_MERGEJOIN_51: 125 - RECORDS_OUT_OPERATOR_RS_12: 25 - RECORDS_OUT_OPERATOR_RS_18: 1 - RECORDS_OUT_OPERATOR_RS_54: 30 - RECORDS_OUT_OPERATOR_RS_57: 5 - RECORDS_OUT_OPERATOR_RS_60: 25 - RECORDS_OUT_OPERATOR_SEL_15: 125 - RECORDS_OUT_OPERATOR_SEL_53: 30 - RECORDS_OUT_OPERATOR_SEL_56: 5 - RECORDS_OUT_OPERATOR_SEL_59: 25 - RECORDS_OUT_OPERATOR_SEL_62: 1 - RECORDS_OUT_OPERATOR_TS_0: 35 - RECORDS_OUT_OPERATOR_TS_3: 50 - RECORDS_OUT_OPERATOR_TS_6: 30 + RECORDS_OUT_OPERATOR_MERGEJOIN_51: 25 + RECORDS_OUT_OPERATOR_MERGEJOIN_52: 125 + RECORDS_OUT_OPERATOR_RS_14: 25 + RECORDS_OUT_OPERATOR_RS_19: 1 + RECORDS_OUT_OPERATOR_RS_55: 25 + RECORDS_OUT_OPERATOR_RS_58: 30 + RECORDS_OUT_OPERATOR_RS_61: 5 + RECORDS_OUT_OPERATOR_SEL_16: 125 + RECORDS_OUT_OPERATOR_SEL_54: 25 + RECORDS_OUT_OPERATOR_SEL_57: 30 + RECORDS_OUT_OPERATOR_SEL_60: 5 + RECORDS_OUT_OPERATOR_SEL_63: 1 + RECORDS_OUT_OPERATOR_TS_0: 30 + RECORDS_OUT_OPERATOR_TS_3: 35 + RECORDS_OUT_OPERATOR_TS_6: 50 TOTAL_TABLE_ROWS_WRITTEN: 0 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 565 @@ -388,16 +388,16 @@ Stage-1 LLAP IO COUNTERS: ROWS_EMITTED: 115 Stage-1 INPUT COUNTERS: GROUPED_INPUT_SPLITS_Map_1: 1 - GROUPED_INPUT_SPLITS_Map_5: 1 + GROUPED_INPUT_SPLITS_Map_4: 1 GROUPED_INPUT_SPLITS_Map_6: 1 INPUT_DIRECTORIES_Map_1: 1 - INPUT_DIRECTORIES_Map_5: 1 + INPUT_DIRECTORIES_Map_4: 1 INPUT_DIRECTORIES_Map_6: 1 INPUT_FILES_Map_1: 1 - INPUT_FILES_Map_5: 1 + INPUT_FILES_Map_4: 1 INPUT_FILES_Map_6: 1 RAW_INPUT_SPLITS_Map_1: 1 - RAW_INPUT_SPLITS_Map_5: 1 + RAW_INPUT_SPLITS_Map_4: 1 RAW_INPUT_SPLITS_Map_6: 1 NULL PREHOOK: query: explain diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out index 10e3ba040a..de2327df4b 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out @@ -1796,14 +1796,35 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Map 7 <- Reducer 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 6 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: k + filterExpr: str is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: str is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: str (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 Map Operator Tree: TableScan alias: i @@ -1839,7 +1860,7 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: v @@ -1860,27 +1881,6 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 87000 Basic stats: PARTIAL Column stats: PARTIAL Execution mode: vectorized, llap LLAP IO: all inputs - Map 7 - Map Operator Tree: - TableScan - alias: k - filterExpr: str is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: str is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: str (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1889,24 +1889,7 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Statistics: Num rows: 11100 Data size: 779119 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() @@ -1919,7 +1902,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1935,6 +1918,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out b/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out index fcae6cae9a..93ee38a8ad 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out @@ -258,48 +258,53 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 9 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) - Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 11 <- Map 10 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) + Reducer 8 <- Reducer 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: orders - filterExpr: (o_orderkey is not null and o_custkey is not null) (type: boolean) - Statistics: Num rows: 1500000000 Data size: 296399999792 Basic stats: COMPLETE Column stats: NONE + alias: l + filterExpr: l_orderkey is not null (type: boolean) + Statistics: Num rows: 5999989709 Data size: 91199843728 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (o_orderkey is not null and o_custkey is not null) (type: boolean) - Statistics: Num rows: 1349999996 Data size: 266759999022 Basic stats: COMPLETE Column stats: NONE + predicate: l_orderkey is not null (type: boolean) + Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: o_orderkey (type: bigint), o_custkey (type: bigint), o_totalprice (type: double), o_orderdate (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1349999996 Data size: 266759999022 Basic stats: COMPLETE Column stats: NONE + expressions: l_orderkey (type: bigint), l_quantity (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: bigint) + key expressions: _col0 (type: bigint) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: bigint) - Statistics: Num rows: 1349999996 Data size: 266759999022 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col2 (type: double), _col3 (type: string) + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 11 + Map 10 Map Operator Tree: TableScan - alias: l + alias: lineitem filterExpr: l_orderkey is not null (type: boolean) + properties: + insideView TRUE Statistics: Num rows: 5999989709 Data size: 91199843728 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: l_orderkey is not null (type: boolean) Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_orderkey (type: bigint), l_quantity (type: double) + Group By Operator + aggregations: sum(l_quantity) + keys: l_orderkey (type: bigint) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -311,56 +316,51 @@ STAGE PLANS: value expressions: _col1 (type: double) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 8 + Map 6 Map Operator Tree: TableScan - alias: customer - filterExpr: c_custkey is not null (type: boolean) - Statistics: Num rows: 150000000 Data size: 27360000192 Basic stats: COMPLETE Column stats: NONE + alias: orders + filterExpr: (o_orderkey is not null and o_custkey is not null) (type: boolean) + Statistics: Num rows: 1500000000 Data size: 296399999792 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: c_custkey is not null (type: boolean) - Statistics: Num rows: 142500000 Data size: 25992000182 Basic stats: COMPLETE Column stats: NONE + predicate: (o_orderkey is not null and o_custkey is not null) (type: boolean) + Statistics: Num rows: 1349999996 Data size: 266759999022 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_custkey (type: bigint), c_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 142500000 Data size: 25992000182 Basic stats: COMPLETE Column stats: NONE + expressions: o_orderkey (type: bigint), o_custkey (type: bigint), o_totalprice (type: double), o_orderdate (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1349999996 Data size: 266759999022 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: bigint) + key expressions: _col1 (type: bigint) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 142500000 Data size: 25992000182 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1349999996 Data size: 266759999022 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col2 (type: double), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 9 Map Operator Tree: TableScan - alias: lineitem - filterExpr: l_orderkey is not null (type: boolean) - properties: - insideView TRUE - Statistics: Num rows: 5999989709 Data size: 91199843728 Basic stats: COMPLETE Column stats: NONE + alias: customer + filterExpr: c_custkey is not null (type: boolean) + Statistics: Num rows: 150000000 Data size: 27360000192 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: l_orderkey is not null (type: boolean) - Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(l_quantity) - keys: l_orderkey (type: bigint) - minReductionHashAggr: 0.99 - mode: hash + predicate: c_custkey is not null (type: boolean) + Statistics: Num rows: 142500000 Data size: 25992000182 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_custkey (type: bigint), c_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 142500000 Data size: 25992000182 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 5699990232 Data size: 86639851670 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Statistics: Num rows: 142500000 Data size: 25992000182 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Reducer 10 + Reducer 11 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -383,42 +383,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 949998372 Data size: 14439975278 Basic stats: COMPLETE Column stats: NONE Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1485000027 Data size: 293436005284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1485000027 Data size: 293436005284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: double), _col3 (type: string), _col4 (type: bigint), _col5 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1633500065 Data size: 322779612808 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1633500065 Data size: 322779612808 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: double), _col3 (type: string), _col4 (type: bigint), _col5 (type: string) - Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -427,17 +391,17 @@ STAGE PLANS: keys: 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col8 + outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7 Statistics: Num rows: 6269989391 Data size: 95303838902 Basic stats: COMPLETE Column stats: NONE Top N Key Operator sort order: -++++ - keys: _col2 (type: double), _col3 (type: string), _col0 (type: bigint), _col4 (type: bigint), _col5 (type: string) + keys: _col4 (type: double), _col5 (type: string), _col2 (type: bigint), _col6 (type: bigint), _col7 (type: string) null sort order: zzzzz Statistics: Num rows: 6269989391 Data size: 95303838902 Basic stats: COMPLETE Column stats: NONE top n: 100 Group By Operator - aggregations: sum(_col8) - keys: _col2 (type: double), _col3 (type: string), _col0 (type: bigint), _col4 (type: bigint), _col5 (type: string) + aggregations: sum(_col1) + keys: _col4 (type: double), _col5 (type: string), _col2 (type: bigint), _col6 (type: bigint), _col7 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -450,7 +414,7 @@ STAGE PLANS: Statistics: Num rows: 6269989391 Data size: 95303838902 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col5 (type: double) - Reducer 5 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -470,7 +434,7 @@ STAGE PLANS: Statistics: Num rows: 3134994695 Data size: 47651919443 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint), _col5 (type: double) - Reducer 6 + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -504,7 +468,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) - Reducer 7 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -519,6 +483,42 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1485000027 Data size: 293436005284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1485000027 Data size: 293436005284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double), _col3 (type: string), _col4 (type: bigint), _col5 (type: string) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1633500065 Data size: 322779612808 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1633500065 Data size: 322779612808 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double), _col3 (type: string), _col4 (type: bigint), _col5 (type: string) Stage: Stage-2 Dependency Collection diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out index 6dc2d9f00b..412c06a845 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out @@ -1367,11 +1367,32 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (key < 6) (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan alias: a @@ -1411,27 +1432,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 9 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap - Map 5 - Map Operator Tree: - TableScan - alias: a - filterExpr: (key < 6) (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out b/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out index 773ed8093f..9baf7999ea 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out @@ -401,7 +401,7 @@ POSTHOOK: Input: default@part POSTHOOK: Input: default@part_null_n0 #### A masked pattern was here #### 0 -Warning: Shuffle Join MERGEJOIN[45][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[46][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 6' is a cross product PREHOOK: query: select count(*) from part where ((p_partkey <> ALL (select p_partkey from part_null_n0)) == false) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -413,8 +413,8 @@ POSTHOOK: Input: default@part POSTHOOK: Input: default@part_null_n0 #### A masked pattern was here #### 26 -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select count(*) from part where (p_partkey <> ALL (select p_partkey from part_null_n0 where p_partkey is null)) is null PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git a/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out b/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out index 96ca2d95ac..7c03f6e222 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out @@ -320,8 +320,8 @@ POSTHOOK: Input: default@part POSTHOOK: Input: default@part_null_n0 #### A masked pattern was here #### 26 -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select count(*) from part where (p_partkey = ANY (select p_partkey from part_null_n0 where p_partkey is null)) is null PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -376,7 +376,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 26 -Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo select count(*) from part where p_partkey >= ANY (select p_partkey from part) AND p_size = ANY (select p_size from part group by p_size) PREHOOK: type: QUERY @@ -393,16 +393,17 @@ HiveAggregate(group=[{}], agg#0=[count()]) HiveProject(p_partkey=[$0], p_size=[$5]) HiveFilter(condition=[IS NOT NULL($5)]) HiveTableScan(table=[[default, part]], table:alias=[part]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(p_size=[$0]) - HiveAggregate(group=[{5}]) - HiveFilter(condition=[IS NOT NULL($5)]) + HiveProject(p_size=[$0], m=[$1], c=[$2], d=[$3]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_size=[$0]) + HiveAggregate(group=[{5}]) + HiveFilter(condition=[IS NOT NULL($5)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], c=[$1], d=[$2]) + HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) HiveTableScan(table=[[default, part]], table:alias=[part]) - HiveProject(m=[$0], c=[$1], d=[$2]) - HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) - HiveTableScan(table=[[default, part]], table:alias=[part]) -Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 6' is a cross product PREHOOK: query: select count(*) from part where p_partkey >= ANY (select p_partkey from part) AND p_size = ANY (select p_size from part group by p_size) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index ad5c9f0af6..215251df67 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -217,8 +217,8 @@ POSTHOOK: Input: default@part_null_n0 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from part where p_size > (select * from tempty_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -229,8 +229,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part POSTHOOK: Input: default@tempty_n0 #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where p_size > (select * from tempty_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -250,12 +250,32 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Map 5 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) - Reducer 4 <- Map 6 (XPROD_EDGE), Reducer 3 (XPROD_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 6 (XPROD_EDGE), Reducer 4 (XPROD_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: part + filterExpr: UDFToDouble(p_size) is not null (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: UDFToDouble(p_size) is not null (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), UDFToDouble(p_size) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: double) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 Map Operator Tree: TableScan alias: tempty_n0 @@ -275,7 +295,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: tempty_n0 @@ -295,27 +315,30 @@ STAGE PLANS: value expressions: _col0 (type: double) Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: part - filterExpr: UDFToDouble(p_size) is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: UDFToDouble(p_size) is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), UDFToDouble(p_size) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: double) - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col11 + residual filter predicates: {(_col9 > _col11)} + Statistics: Num rows: 8 Data size: 5784 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 8 Data size: 5784 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 5784 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -332,7 +355,7 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Reducer 3 + Reducer 5 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -348,29 +371,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 95 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: double) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - residual filter predicates: {(_col11 > _col1)} - Statistics: Num rows: 8 Data size: 5784 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 5784 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 5784 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -4322,13 +4322,38 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: lineitem + filterExpr: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(l_quantity), count(l_quantity) + keys: l_partkey (type: int) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 Map Operator Tree: TableScan alias: lineitem @@ -4350,7 +4375,7 @@ STAGE PLANS: value expressions: _col1 (type: double), _col2 (type: double) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: part @@ -4371,49 +4396,29 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: lineitem - filterExpr: l_partkey is not null (type: boolean) - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: l_partkey is not null (type: boolean) - Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(l_quantity), count(l_quantity) - keys: l_partkey (type: int) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double), _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col3 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double), _col2 (type: double) + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 is not null and _col2 is not null) (type: boolean) + Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), (_col1 / _col2) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -4421,17 +4426,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col5 - residual filter predicates: {(_col1 > _col5)} + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col1, _col3, _col4 + residual filter predicates: {(_col3 > _col1)} Statistics: Num rows: 8 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: double) - outputColumnNames: _col2 + expressions: _col4 (type: double) + outputColumnNames: _col4 Statistics: Num rows: 8 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(_col2) + aggregations: sum(_col4) minReductionHashAggr: 0.875 mode: hash outputColumnNames: _col0 @@ -4456,29 +4461,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Execution mode: vectorized, llap + Reducer 6 + Execution mode: llap Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 is not null and _col2 is not null) (type: boolean) - Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), (_col1 / _col2) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double) + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col2 (type: double) Stage: Stage-0 Fetch Operator @@ -5883,8 +5883,8 @@ having count(*) > (select count(*) from src s1 where s1.key > '9' ) POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where p_size > (select max(p_size) from part group by p_type) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -5902,14 +5902,34 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) - Reducer 4 <- Map 8 (XPROD_EDGE), Reducer 3 (XPROD_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (XPROD_EDGE), Reducer 8 (XPROD_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: part + filterExpr: p_size is not null (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_size is not null (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 Map Operator Tree: TableScan alias: part @@ -5934,7 +5954,7 @@ STAGE PLANS: value expressions: _col1 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: part @@ -5957,27 +5977,30 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 8 - Map Operator Tree: - TableScan - alias: part - filterExpr: p_size is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: p_size is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + residual filter predicates: {(_col5 > _col9)} + Statistics: Num rows: 112 Data size: 69776 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 112 Data size: 69328 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 112 Data size: 69328 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -6002,7 +6025,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) - Reducer 3 + Reducer 5 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -6018,30 +6041,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - residual filter predicates: {(_col7 > _col0)} - Statistics: Num rows: 112 Data size: 69776 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 112 Data size: 69328 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 112 Data size: 69328 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -6062,7 +6062,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 7 + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out index 311cee743d..65cb2d0a9b 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -641,37 +641,14 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) - Reducer 8 <- Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: p - filterExpr: p_type is not null (type: boolean) - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: p_type is not null (type: boolean) - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_type (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 6 Map Operator Tree: TableScan alias: p @@ -723,28 +700,75 @@ STAGE PLANS: value expressions: _col1 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs + Map 7 + Map Operator Tree: + TableScan + alias: p + filterExpr: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_type (type: string) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator + aggregations: max(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), true (type: boolean), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: string) + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int), _col2 (type: string) + 1 _col1 (type: int), _col0 (type: string) + outputColumnNames: _col1, _col4, _col5, _col7, _col8, _col9 + Statistics: Num rows: 13 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col4 (type: int), ((_col8 or _col7 is null) is true or ((_col5 or _col9) is true and null and (_col8 or _col7 is null) is not true and _col1 is null) or ((_col8 or _col7 is null) is not true and _col1 is null and (_col5 or _col9) is not true)) (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -769,7 +793,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: boolean) - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -787,52 +811,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int), _col0 (type: string) Statistics: Num rows: 13 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean), _col4 (type: bigint), _col5 (type: boolean), _col6 (type: boolean) - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: int), _col0 (type: string) - 1 _col0 (type: int), _col2 (type: string) - outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col8 - Statistics: Num rows: 13 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), ((_col5 or _col4 is null) is true or ((_col2 or _col6) is true and null and (_col5 or _col4 is null) is not true and _col8 is null) or ((_col5 or _col4 is null) is not true and _col8 is null and (_col2 or _col6) is not true)) (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), true (type: boolean), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col2 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col2 (type: string) - Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean) - Reducer 8 + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -858,6 +837,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: boolean), _col3 (type: boolean) + Reducer 8 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out index 0dc9821846..dada7e45c3 100644 --- a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out @@ -424,7 +424,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@l3_monthly_dw_dimplan POSTHOOK: Output: default@l3_monthly_dw_dimplan #### A masked pattern was here #### -Warning: Map Join MAPJOIN[49][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[50][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: EXPLAIN EXTENDED SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join @@ -488,11 +488,92 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: s1 + filterExpr: ((idp_data_date = DATE'2017-12-28') and finplan_detail_object_id is not null and l3_snapshot_number is not null) (type: boolean) + Statistics: Num rows: 180340 Data size: 14427200 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((idp_data_date = DATE'2017-12-28') and finplan_detail_object_id is not null and l3_snapshot_number is not null) (type: boolean) + Statistics: Num rows: 90170 Data size: 7213600 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: l3_snapshot_number (type: bigint), plan_key (type: bigint), finplan_detail_object_id (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 90170 Data size: 2164080 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col2 (type: bigint) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col2 (type: bigint) + Statistics: Num rows: 90170 Data size: 2164080 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: l3_monthly_dw_dimplan + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"bmo_cost_type":"true","bmo_fiscal_year":"true","charge_code_key":"true","charge_code_object_id":"true","clarity_updated_date":"true","finplan_detail_object_id":"true","idp_audit_id":"true","idp_data_date":"true","idp_warehouse_id":"true","is_latest_snapshot":"true","l3_created_date":"true","l3_snapshot_number":"true","last_updated_by":"true","latest_fiscal_budget_plan":"true","percentage":"true","period_end":"true","period_start":"true","period_type":"true","plan_category":"true","plan_code":"true","plan_description":"true","plan_key":"true","plan_name":"true","plan_of_record":"true","plan_status":"true","plan_type":"true","project_key":"true","project_object_id":"true","resoruce_object_id":"true","resource_key":"true","transclass_key":"true","txn_class_object_id":"true"}} + bucket_count 64 + bucket_field_name idp_data_date + bucketing_version 2 + column.name.delimiter , + columns idp_warehouse_id,idp_audit_id,idp_data_date,l3_snapshot_number,plan_key,project_key,charge_code_key,transclass_key,resource_key,finplan_detail_object_id,project_object_id,txn_class_object_id,charge_code_object_id,resoruce_object_id,plan_name,plan_code,plan_type,period_type,plan_description,plan_status,period_start,period_end,plan_of_record,percentage,l3_created_date,bmo_cost_type,bmo_fiscal_year,clarity_updated_date,is_latest_snapshot,latest_fiscal_budget_plan,plan_category,last_updated_by + columns.comments + columns.types bigint:bigint:date:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:varchar(1500):varchar(500):varchar(50):varchar(50):varchar(3000):varchar(50):varchar(50):varchar(50):varchar(1):decimal(32,6):timestamp:varchar(30):varchar(50):timestamp:bigint:bigint:varchar(70):varchar(250) +#### A masked pattern was here #### + name default.l3_monthly_dw_dimplan + numFiles 1 + numRows 180340 + rawDataSize 269826156 + serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 5242699 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"bmo_cost_type":"true","bmo_fiscal_year":"true","charge_code_key":"true","charge_code_object_id":"true","clarity_updated_date":"true","finplan_detail_object_id":"true","idp_audit_id":"true","idp_data_date":"true","idp_warehouse_id":"true","is_latest_snapshot":"true","l3_created_date":"true","l3_snapshot_number":"true","last_updated_by":"true","latest_fiscal_budget_plan":"true","percentage":"true","period_end":"true","period_start":"true","period_type":"true","plan_category":"true","plan_code":"true","plan_description":"true","plan_key":"true","plan_name":"true","plan_of_record":"true","plan_status":"true","plan_type":"true","project_key":"true","project_object_id":"true","resoruce_object_id":"true","resource_key":"true","transclass_key":"true","txn_class_object_id":"true"}} + bucket_count 64 + bucket_field_name idp_data_date + bucketing_version 2 + column.name.delimiter , + columns idp_warehouse_id,idp_audit_id,idp_data_date,l3_snapshot_number,plan_key,project_key,charge_code_key,transclass_key,resource_key,finplan_detail_object_id,project_object_id,txn_class_object_id,charge_code_object_id,resoruce_object_id,plan_name,plan_code,plan_type,period_type,plan_description,plan_status,period_start,period_end,plan_of_record,percentage,l3_created_date,bmo_cost_type,bmo_fiscal_year,clarity_updated_date,is_latest_snapshot,latest_fiscal_budget_plan,plan_category,last_updated_by + columns.comments + columns.types bigint:bigint:date:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:varchar(1500):varchar(500):varchar(50):varchar(50):varchar(3000):varchar(50):varchar(50):varchar(50):varchar(1):decimal(32,6):timestamp:varchar(30):varchar(50):timestamp:bigint:bigint:varchar(70):varchar(250) +#### A masked pattern was here #### + name default.l3_monthly_dw_dimplan + numFiles 1 + numRows 180340 + rawDataSize 269826156 + serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 5242699 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.l3_monthly_dw_dimplan + name: default.l3_monthly_dw_dimplan + Truncated Path -> Alias: + /l3_monthly_dw_dimplan [s1] + Map 2 Map Operator Tree: TableScan alias: dw @@ -510,29 +591,29 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 3 => 1 + Estimated key counts: Map 4 => 1 keys: 0 1 outputColumnNames: _col0, _col1 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 - Estimated key counts: Map 4 => 90170 + Right Outer Join 0 to 1 + Estimated key counts: Map 1 => 90170 keys: - 0 _col1 (type: bigint), _col0 (type: bigint) - 1 _col0 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col1, _col3 + 0 _col0 (type: bigint), _col2 (type: bigint) + 1 _col1 (type: bigint), _col0 (type: bigint) + outputColumnNames: _col1, _col4 input vertices: - 1 Map 4 - Position of Big Table: 0 + 0 Map 1 + Position of Big Table: 1 Statistics: Num rows: 30 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: bigint), _col3 (type: bigint) + expressions: _col4 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col3 Statistics: Num rows: 30 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator @@ -621,7 +702,7 @@ STAGE PLANS: name: default.l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1 Truncated Path -> Alias: /l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1 [dw] - Map 3 + Map 4 Map Operator Tree: TableScan alias: snap @@ -693,87 +774,6 @@ STAGE PLANS: name: default.l3_clarity__l3_snap_number_2018022300104 Truncated Path -> Alias: /l3_clarity__l3_snap_number_2018022300104 [snap] - Map 4 - Map Operator Tree: - TableScan - alias: s1 - filterExpr: ((idp_data_date = DATE'2017-12-28') and finplan_detail_object_id is not null and l3_snapshot_number is not null) (type: boolean) - Statistics: Num rows: 180340 Data size: 14427200 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((idp_data_date = DATE'2017-12-28') and finplan_detail_object_id is not null and l3_snapshot_number is not null) (type: boolean) - Statistics: Num rows: 90170 Data size: 7213600 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: l3_snapshot_number (type: bigint), plan_key (type: bigint), finplan_detail_object_id (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 90170 Data size: 2164080 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col2 (type: bigint) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col2 (type: bigint) - Statistics: Num rows: 90170 Data size: 2164080 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - value expressions: _col1 (type: bigint) - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: all inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: l3_monthly_dw_dimplan - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"bmo_cost_type":"true","bmo_fiscal_year":"true","charge_code_key":"true","charge_code_object_id":"true","clarity_updated_date":"true","finplan_detail_object_id":"true","idp_audit_id":"true","idp_data_date":"true","idp_warehouse_id":"true","is_latest_snapshot":"true","l3_created_date":"true","l3_snapshot_number":"true","last_updated_by":"true","latest_fiscal_budget_plan":"true","percentage":"true","period_end":"true","period_start":"true","period_type":"true","plan_category":"true","plan_code":"true","plan_description":"true","plan_key":"true","plan_name":"true","plan_of_record":"true","plan_status":"true","plan_type":"true","project_key":"true","project_object_id":"true","resoruce_object_id":"true","resource_key":"true","transclass_key":"true","txn_class_object_id":"true"}} - bucket_count 64 - bucket_field_name idp_data_date - bucketing_version 2 - column.name.delimiter , - columns idp_warehouse_id,idp_audit_id,idp_data_date,l3_snapshot_number,plan_key,project_key,charge_code_key,transclass_key,resource_key,finplan_detail_object_id,project_object_id,txn_class_object_id,charge_code_object_id,resoruce_object_id,plan_name,plan_code,plan_type,period_type,plan_description,plan_status,period_start,period_end,plan_of_record,percentage,l3_created_date,bmo_cost_type,bmo_fiscal_year,clarity_updated_date,is_latest_snapshot,latest_fiscal_budget_plan,plan_category,last_updated_by - columns.comments - columns.types bigint:bigint:date:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:varchar(1500):varchar(500):varchar(50):varchar(50):varchar(3000):varchar(50):varchar(50):varchar(50):varchar(1):decimal(32,6):timestamp:varchar(30):varchar(50):timestamp:bigint:bigint:varchar(70):varchar(250) -#### A masked pattern was here #### - name default.l3_monthly_dw_dimplan - numFiles 1 - numRows 180340 - rawDataSize 269826156 - serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 5242699 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"bmo_cost_type":"true","bmo_fiscal_year":"true","charge_code_key":"true","charge_code_object_id":"true","clarity_updated_date":"true","finplan_detail_object_id":"true","idp_audit_id":"true","idp_data_date":"true","idp_warehouse_id":"true","is_latest_snapshot":"true","l3_created_date":"true","l3_snapshot_number":"true","last_updated_by":"true","latest_fiscal_budget_plan":"true","percentage":"true","period_end":"true","period_start":"true","period_type":"true","plan_category":"true","plan_code":"true","plan_description":"true","plan_key":"true","plan_name":"true","plan_of_record":"true","plan_status":"true","plan_type":"true","project_key":"true","project_object_id":"true","resoruce_object_id":"true","resource_key":"true","transclass_key":"true","txn_class_object_id":"true"}} - bucket_count 64 - bucket_field_name idp_data_date - bucketing_version 2 - column.name.delimiter , - columns idp_warehouse_id,idp_audit_id,idp_data_date,l3_snapshot_number,plan_key,project_key,charge_code_key,transclass_key,resource_key,finplan_detail_object_id,project_object_id,txn_class_object_id,charge_code_object_id,resoruce_object_id,plan_name,plan_code,plan_type,period_type,plan_description,plan_status,period_start,period_end,plan_of_record,percentage,l3_created_date,bmo_cost_type,bmo_fiscal_year,clarity_updated_date,is_latest_snapshot,latest_fiscal_budget_plan,plan_category,last_updated_by - columns.comments - columns.types bigint:bigint:date:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:varchar(1500):varchar(500):varchar(50):varchar(50):varchar(3000):varchar(50):varchar(50):varchar(50):varchar(1):decimal(32,6):timestamp:varchar(30):varchar(50):timestamp:bigint:bigint:varchar(70):varchar(250) -#### A masked pattern was here #### - name default.l3_monthly_dw_dimplan - numFiles 1 - numRows 180340 - rawDataSize 269826156 - serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 5242699 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.l3_monthly_dw_dimplan - name: default.l3_monthly_dw_dimplan - Truncated Path -> Alias: - /l3_monthly_dw_dimplan [s1] Map 5 Map Operator Tree: TableScan @@ -853,7 +853,7 @@ STAGE PLANS: name: default.l3_clarity__l3_monthly_dw_factplan_datajoin_1_s2_2018022300104_1 Truncated Path -> Alias: /l3_clarity__l3_monthly_dw_factplan_datajoin_1_s2_2018022300104_1 [s2] - Reducer 2 + Reducer 3 Execution mode: vectorized, llap Needs Tagging: false Reduce Operator Tree: @@ -897,7 +897,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[49][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[50][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join l3_clarity__L3_MONTHLY_DW_FACTPLAN_DW_STG_2018022300104_1 DW on 1=1 @@ -939,7 +939,7 @@ POSTHOOK: Input: default@l3_monthly_dw_dimplan 7147200 195775 27114 7147200 234349 27114 7147200 350519 27114 -Warning: Map Join MAPJOIN[49][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[50][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: EXPLAIN EXTENDED SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join @@ -1003,11 +1003,92 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: s1 + filterExpr: ((idp_data_date = DATE'2017-12-28') and finplan_detail_object_id is not null and l3_snapshot_number is not null) (type: boolean) + Statistics: Num rows: 180340 Data size: 14427200 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((idp_data_date = DATE'2017-12-28') and finplan_detail_object_id is not null and l3_snapshot_number is not null) (type: boolean) + Statistics: Num rows: 90170 Data size: 7213600 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: l3_snapshot_number (type: bigint), plan_key (type: bigint), finplan_detail_object_id (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 90170 Data size: 2164080 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col2 (type: bigint) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col2 (type: bigint) + Statistics: Num rows: 90170 Data size: 2164080 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: true + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: l3_monthly_dw_dimplan + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"bmo_cost_type":"true","bmo_fiscal_year":"true","charge_code_key":"true","charge_code_object_id":"true","clarity_updated_date":"true","finplan_detail_object_id":"true","idp_audit_id":"true","idp_data_date":"true","idp_warehouse_id":"true","is_latest_snapshot":"true","l3_created_date":"true","l3_snapshot_number":"true","last_updated_by":"true","latest_fiscal_budget_plan":"true","percentage":"true","period_end":"true","period_start":"true","period_type":"true","plan_category":"true","plan_code":"true","plan_description":"true","plan_key":"true","plan_name":"true","plan_of_record":"true","plan_status":"true","plan_type":"true","project_key":"true","project_object_id":"true","resoruce_object_id":"true","resource_key":"true","transclass_key":"true","txn_class_object_id":"true"}} + bucket_count 64 + bucket_field_name idp_data_date + bucketing_version 2 + column.name.delimiter , + columns idp_warehouse_id,idp_audit_id,idp_data_date,l3_snapshot_number,plan_key,project_key,charge_code_key,transclass_key,resource_key,finplan_detail_object_id,project_object_id,txn_class_object_id,charge_code_object_id,resoruce_object_id,plan_name,plan_code,plan_type,period_type,plan_description,plan_status,period_start,period_end,plan_of_record,percentage,l3_created_date,bmo_cost_type,bmo_fiscal_year,clarity_updated_date,is_latest_snapshot,latest_fiscal_budget_plan,plan_category,last_updated_by + columns.comments + columns.types bigint:bigint:date:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:varchar(1500):varchar(500):varchar(50):varchar(50):varchar(3000):varchar(50):varchar(50):varchar(50):varchar(1):decimal(32,6):timestamp:varchar(30):varchar(50):timestamp:bigint:bigint:varchar(70):varchar(250) +#### A masked pattern was here #### + name default.l3_monthly_dw_dimplan + numFiles 1 + numRows 180340 + rawDataSize 269826156 + serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 5242699 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"bmo_cost_type":"true","bmo_fiscal_year":"true","charge_code_key":"true","charge_code_object_id":"true","clarity_updated_date":"true","finplan_detail_object_id":"true","idp_audit_id":"true","idp_data_date":"true","idp_warehouse_id":"true","is_latest_snapshot":"true","l3_created_date":"true","l3_snapshot_number":"true","last_updated_by":"true","latest_fiscal_budget_plan":"true","percentage":"true","period_end":"true","period_start":"true","period_type":"true","plan_category":"true","plan_code":"true","plan_description":"true","plan_key":"true","plan_name":"true","plan_of_record":"true","plan_status":"true","plan_type":"true","project_key":"true","project_object_id":"true","resoruce_object_id":"true","resource_key":"true","transclass_key":"true","txn_class_object_id":"true"}} + bucket_count 64 + bucket_field_name idp_data_date + bucketing_version 2 + column.name.delimiter , + columns idp_warehouse_id,idp_audit_id,idp_data_date,l3_snapshot_number,plan_key,project_key,charge_code_key,transclass_key,resource_key,finplan_detail_object_id,project_object_id,txn_class_object_id,charge_code_object_id,resoruce_object_id,plan_name,plan_code,plan_type,period_type,plan_description,plan_status,period_start,period_end,plan_of_record,percentage,l3_created_date,bmo_cost_type,bmo_fiscal_year,clarity_updated_date,is_latest_snapshot,latest_fiscal_budget_plan,plan_category,last_updated_by + columns.comments + columns.types bigint:bigint:date:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:varchar(1500):varchar(500):varchar(50):varchar(50):varchar(3000):varchar(50):varchar(50):varchar(50):varchar(1):decimal(32,6):timestamp:varchar(30):varchar(50):timestamp:bigint:bigint:varchar(70):varchar(250) +#### A masked pattern was here #### + name default.l3_monthly_dw_dimplan + numFiles 1 + numRows 180340 + rawDataSize 269826156 + serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 5242699 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.l3_monthly_dw_dimplan + name: default.l3_monthly_dw_dimplan + Truncated Path -> Alias: + /l3_monthly_dw_dimplan [s1] + Map 2 Map Operator Tree: TableScan alias: dw @@ -1025,29 +1106,29 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 3 => 1 + Estimated key counts: Map 4 => 1 keys: 0 1 outputColumnNames: _col0, _col1 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 15 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Left Outer Join 0 to 1 - Estimated key counts: Map 4 => 90170 + Right Outer Join 0 to 1 + Estimated key counts: Map 1 => 90170 keys: - 0 _col1 (type: bigint), _col0 (type: bigint) - 1 _col0 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col1, _col3 + 0 _col0 (type: bigint), _col2 (type: bigint) + 1 _col1 (type: bigint), _col0 (type: bigint) + outputColumnNames: _col1, _col4 input vertices: - 1 Map 4 - Position of Big Table: 0 + 0 Map 1 + Position of Big Table: 1 Statistics: Num rows: 30 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: bigint), _col3 (type: bigint) + expressions: _col4 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col3 Statistics: Num rows: 30 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator @@ -1136,7 +1217,7 @@ STAGE PLANS: name: default.l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1 Truncated Path -> Alias: /l3_clarity__l3_monthly_dw_factplan_dw_stg_2018022300104_1 [dw] - Map 3 + Map 4 Map Operator Tree: TableScan alias: snap @@ -1208,87 +1289,6 @@ STAGE PLANS: name: default.l3_clarity__l3_snap_number_2018022300104 Truncated Path -> Alias: /l3_clarity__l3_snap_number_2018022300104 [snap] - Map 4 - Map Operator Tree: - TableScan - alias: s1 - filterExpr: ((idp_data_date = DATE'2017-12-28') and finplan_detail_object_id is not null and l3_snapshot_number is not null) (type: boolean) - Statistics: Num rows: 180340 Data size: 14427200 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((idp_data_date = DATE'2017-12-28') and finplan_detail_object_id is not null and l3_snapshot_number is not null) (type: boolean) - Statistics: Num rows: 90170 Data size: 7213600 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: l3_snapshot_number (type: bigint), plan_key (type: bigint), finplan_detail_object_id (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 90170 Data size: 2164080 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col2 (type: bigint) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col2 (type: bigint) - Statistics: Num rows: 90170 Data size: 2164080 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - value expressions: _col1 (type: bigint) - auto parallelism: true - Execution mode: vectorized, llap - LLAP IO: all inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: l3_monthly_dw_dimplan - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"bmo_cost_type":"true","bmo_fiscal_year":"true","charge_code_key":"true","charge_code_object_id":"true","clarity_updated_date":"true","finplan_detail_object_id":"true","idp_audit_id":"true","idp_data_date":"true","idp_warehouse_id":"true","is_latest_snapshot":"true","l3_created_date":"true","l3_snapshot_number":"true","last_updated_by":"true","latest_fiscal_budget_plan":"true","percentage":"true","period_end":"true","period_start":"true","period_type":"true","plan_category":"true","plan_code":"true","plan_description":"true","plan_key":"true","plan_name":"true","plan_of_record":"true","plan_status":"true","plan_type":"true","project_key":"true","project_object_id":"true","resoruce_object_id":"true","resource_key":"true","transclass_key":"true","txn_class_object_id":"true"}} - bucket_count 64 - bucket_field_name idp_data_date - bucketing_version 2 - column.name.delimiter , - columns idp_warehouse_id,idp_audit_id,idp_data_date,l3_snapshot_number,plan_key,project_key,charge_code_key,transclass_key,resource_key,finplan_detail_object_id,project_object_id,txn_class_object_id,charge_code_object_id,resoruce_object_id,plan_name,plan_code,plan_type,period_type,plan_description,plan_status,period_start,period_end,plan_of_record,percentage,l3_created_date,bmo_cost_type,bmo_fiscal_year,clarity_updated_date,is_latest_snapshot,latest_fiscal_budget_plan,plan_category,last_updated_by - columns.comments - columns.types bigint:bigint:date:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:varchar(1500):varchar(500):varchar(50):varchar(50):varchar(3000):varchar(50):varchar(50):varchar(50):varchar(1):decimal(32,6):timestamp:varchar(30):varchar(50):timestamp:bigint:bigint:varchar(70):varchar(250) -#### A masked pattern was here #### - name default.l3_monthly_dw_dimplan - numFiles 1 - numRows 180340 - rawDataSize 269826156 - serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 5242699 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"bmo_cost_type":"true","bmo_fiscal_year":"true","charge_code_key":"true","charge_code_object_id":"true","clarity_updated_date":"true","finplan_detail_object_id":"true","idp_audit_id":"true","idp_data_date":"true","idp_warehouse_id":"true","is_latest_snapshot":"true","l3_created_date":"true","l3_snapshot_number":"true","last_updated_by":"true","latest_fiscal_budget_plan":"true","percentage":"true","period_end":"true","period_start":"true","period_type":"true","plan_category":"true","plan_code":"true","plan_description":"true","plan_key":"true","plan_name":"true","plan_of_record":"true","plan_status":"true","plan_type":"true","project_key":"true","project_object_id":"true","resoruce_object_id":"true","resource_key":"true","transclass_key":"true","txn_class_object_id":"true"}} - bucket_count 64 - bucket_field_name idp_data_date - bucketing_version 2 - column.name.delimiter , - columns idp_warehouse_id,idp_audit_id,idp_data_date,l3_snapshot_number,plan_key,project_key,charge_code_key,transclass_key,resource_key,finplan_detail_object_id,project_object_id,txn_class_object_id,charge_code_object_id,resoruce_object_id,plan_name,plan_code,plan_type,period_type,plan_description,plan_status,period_start,period_end,plan_of_record,percentage,l3_created_date,bmo_cost_type,bmo_fiscal_year,clarity_updated_date,is_latest_snapshot,latest_fiscal_budget_plan,plan_category,last_updated_by - columns.comments - columns.types bigint:bigint:date:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:varchar(1500):varchar(500):varchar(50):varchar(50):varchar(3000):varchar(50):varchar(50):varchar(50):varchar(1):decimal(32,6):timestamp:varchar(30):varchar(50):timestamp:bigint:bigint:varchar(70):varchar(250) -#### A masked pattern was here #### - name default.l3_monthly_dw_dimplan - numFiles 1 - numRows 180340 - rawDataSize 269826156 - serialization.ddl struct l3_monthly_dw_dimplan { i64 idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, varchar(50) period_type, varchar(3000) plan_description, varchar(50) plan_status, varchar(50) period_start, varchar(50) period_end, varchar(1) plan_of_record, decimal(32,6) percentage, timestamp l3_created_date, varchar(30) bmo_cost_type, varchar(50) bmo_fiscal_year, timestamp clarity_updated_date, i64 is_latest_snapshot, i64 latest_fiscal_budget_plan, varchar(70) plan_category, varchar(250) last_updated_by} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde - totalSize 5242699 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.l3_monthly_dw_dimplan - name: default.l3_monthly_dw_dimplan - Truncated Path -> Alias: - /l3_monthly_dw_dimplan [s1] Map 5 Map Operator Tree: TableScan @@ -1368,7 +1368,7 @@ STAGE PLANS: name: default.l3_clarity__l3_monthly_dw_factplan_datajoin_1_s2_2018022300104_1 Truncated Path -> Alias: /l3_clarity__l3_monthly_dw_factplan_datajoin_1_s2_2018022300104_1 [s2] - Reducer 2 + Reducer 3 Execution mode: vectorized, llap Needs Tagging: false Reduce Operator Tree: @@ -1412,7 +1412,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[49][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[50][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join l3_clarity__L3_MONTHLY_DW_FACTPLAN_DW_STG_2018022300104_1 DW on 1=1 diff --git a/ql/src/test/results/clientpositive/llap/tez_smb_main.q.out b/ql/src/test/results/clientpositive/llap/tez_smb_main.q.out index d1991cc9e7..cf222a10b3 100644 --- a/ql/src/test/results/clientpositive/llap/tez_smb_main.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_smb_main.q.out @@ -651,12 +651,33 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan alias: a @@ -678,7 +699,7 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: c @@ -699,45 +720,7 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -758,7 +741,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -773,6 +756,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -805,12 +805,33 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 4 <- Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 Map Operator Tree: TableScan alias: a @@ -831,7 +852,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -841,7 +862,7 @@ STAGE PLANS: Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -862,27 +883,6 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1596,12 +1596,47 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (CUSTOM_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (CUSTOM_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 Map Operator Tree: TableScan alias: a @@ -1622,7 +1657,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 2 + 1 Map 4 Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -1632,7 +1667,7 @@ STAGE PLANS: Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: c @@ -1653,42 +1688,7 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 4 + Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1735,12 +1735,47 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (CUSTOM_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (CUSTOM_EDGE) + Map 3 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 Map Operator Tree: TableScan alias: a @@ -1761,7 +1796,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 2 + 1 Map 4 Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -1771,7 +1806,7 @@ STAGE PLANS: Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: c @@ -1792,42 +1827,7 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 4 + Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out b/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index 5ddfeb7155..6ce07b9bc9 100644 --- a/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -670,9 +670,9 @@ b str two line1 four line2 six line3 -Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Map 1' is a cross product -Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Map 1' is a cross product -Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Map 4' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Map 2' is a cross product +Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Map 2' is a cross product +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN VECTORIZATION DETAIL INSERT INTO TABLE orc_create_complex_n0 SELECT orc_create_staging_n0.*, src1.key FROM orc_create_staging_n0 cross join src src1 cross join orc_create_staging_n0 spam1 cross join orc_create_staging_n0 spam2 @@ -703,11 +703,82 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) - Map 4 <- Map 1 (BROADCAST_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 2 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Map Join Vectorization: + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinInnerMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 3:string, 4:map, 5:array, 6:struct + smallTableValueMapping: 3:string, 4:map, 5:array, 6:struct + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 2 + Statistics: Num rows: 500 Data size: 1769000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: map), _col3 (type: array), _col4 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 4, 5, 6, 0] + Statistics: Num rows: 500 Data size: 1769000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 500 Data size: 1769000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_create_complex_n0 + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, map, array, struct] + Map 2 Map Operator Tree: TableScan alias: orc_create_staging_n0 @@ -737,7 +808,7 @@ STAGE PLANS: nativeConditionsNotMet: Supports Value Types [MAP, LIST, STRUCT] IS false outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 1 Data size: 3445 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -753,7 +824,7 @@ STAGE PLANS: nativeConditionsNotMet: Supports Value Types [MAP, LIST, STRUCT] IS false outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 1 Data size: 3450 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: @@ -781,7 +852,7 @@ STAGE PLANS: dataColumns: str:string, mp:map, lst:array, strct:struct partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 2 + Map 3 Map Operator Tree: TableScan alias: spam2 @@ -819,7 +890,7 @@ STAGE PLANS: dataColumns: str:string, mp:map, lst:array, strct:struct partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 3 + Map 4 Map Operator Tree: TableScan alias: spam1 @@ -857,77 +928,6 @@ STAGE PLANS: dataColumns: str:string, mp:map, lst:array, strct:struct partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Map Join Vectorization: - bigTableRetainColumnNums: [0] - bigTableValueColumns: 0:string - className: VectorMapJoinInnerMultiKeyOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nonOuterSmallTableKeyMapping: [] - projectedOutput: 3:string, 4:map, 5:array, 6:struct, 0:string - smallTableValueMapping: 3:string, 4:map, 5:array, 6:struct - hashTableImplementationType: OPTIMIZED - outputColumnNames: _col0, _col1, _col2, _col3, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 500 Data size: 1769000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [3, 4, 5, 6, 0] - Statistics: Num rows: 500 Data size: 1769000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 500 Data size: 1769000 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.orc_create_complex_n0 - Execution mode: vectorized, llap - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [string, map, array, struct] Stage: Stage-2 Dependency Collection @@ -946,9 +946,9 @@ STAGE PLANS: Stats Work Basic Stats Work: -Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Map 1' is a cross product -Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Map 1' is a cross product -Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Map 4' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Map 2' is a cross product +Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Map 2' is a cross product +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: INSERT INTO TABLE orc_create_complex_n0 SELECT orc_create_staging_n0.*, src1.key FROM orc_create_staging_n0 cross join src src1 cross join orc_create_staging_n0 spam1 cross join orc_create_staging_n0 spam2 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out index ea6aa83ec0..f8107863e1 100644 --- a/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out @@ -29,18 +29,18 @@ INNER JOIN (SELECT `key` FROM `default`.`src1` WHERE `key` IS NOT NULL) AS `t4` ON `t2`.`key` = `t4`.`key`) ON `t0`.`value` = `t2`.`value` STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 + Stage-6 is a root stage + Stage-4 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src Fetch Operator limit: -1 - $hdt$_2:src1 + $hdt$_1:$hdt$_2:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -60,10 +60,10 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - $hdt$_2:src1 + 0 _col0 (type: string) + 1 _col1 (type: string) + Position of Big Table: 1 + $hdt$_1:$hdt$_2:src1 TableScan alias: src1 filterExpr: key is not null (type: boolean) @@ -83,7 +83,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -112,33 +112,37 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Position of Big Table: 0 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1 + Position of Big Table: 1 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Execution mode: vectorized Local Work: Map Reduce Local Work @@ -438,10 +442,10 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:srcpart] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_1:srcpart] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_1:srcpart] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:srcpart] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:$hdt$_1:srcpart] + /srcpart/ds=2008-04-09/hr=11 [$hdt$_1:$hdt$_1:srcpart] + /srcpart/ds=2008-04-09/hr=12 [$hdt$_1:$hdt$_1:srcpart] Stage: Stage-0 Fetch Operator @@ -472,18 +476,18 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 + Stage-6 is a root stage + Stage-4 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src Fetch Operator limit: -1 - $hdt$_2:src1 + $hdt$_1:$hdt$_2:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -501,9 +505,9 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - $hdt$_2:src1 + 0 _col0 (type: string) + 1 _col1 (type: string) + $hdt$_1:$hdt$_2:src1 TableScan alias: src1 filterExpr: key is not null (type: boolean) @@ -520,7 +524,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -546,17 +550,21 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 805 Data size: 8560 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 805 Data size: 8560 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 805 Data size: 8560 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/mapjoin_subquery.q.out b/ql/src/test/results/clientpositive/mapjoin_subquery.q.out index 07237da7de..e0d9197c54 100644 --- a/ql/src/test/results/clientpositive/mapjoin_subquery.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_subquery.q.out @@ -23,18 +23,18 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 + Stage-6 is a root stage + Stage-4 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:z Fetch Operator limit: -1 - $hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -52,9 +52,9 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - $hdt$_2:x + 0 _col0 (type: string) + 1 _col1 (type: string) + $hdt$_1:$hdt$_2:x TableScan alias: x filterExpr: key is not null (type: boolean) @@ -71,7 +71,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -97,12 +97,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col1 (type: string) outputColumnNames: _col1, _col3 Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: string), _col3 (type: string) + expressions: _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -276,18 +276,18 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 + Stage-6 is a root stage + Stage-4 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:z Fetch Operator limit: -1 - $hdt$_2:x + $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: @@ -305,9 +305,9 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - $hdt$_2:x + 0 _col0 (type: string) + 1 _col1 (type: string) + $hdt$_1:$hdt$_2:x TableScan alias: x filterExpr: key is not null (type: boolean) @@ -324,7 +324,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -350,12 +350,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col1 (type: string) outputColumnNames: _col1, _col3 Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: string), _col3 (type: string) + expressions: _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 61 Data size: 10797 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/mergejoin.q.out b/ql/src/test/results/clientpositive/mergejoin.q.out index fa5ef9ce12..9387640337 100644 --- a/ql/src/test/results/clientpositive/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/mergejoin.q.out @@ -1891,13 +1891,13 @@ PLAN VECTORIZATION: enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1958,16 +1958,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: b filterExpr: key is not null (type: boolean) @@ -1985,6 +1978,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Map Vectorization: enabled: false enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false @@ -2013,7 +2013,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -2662,13 +2662,13 @@ PLAN VECTORIZATION: enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -2729,16 +2729,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: b filterExpr: key is not null (type: boolean) @@ -2756,6 +2749,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE Map Vectorization: enabled: false enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false @@ -2784,7 +2784,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/perf/spark/query1.q.out b/ql/src/test/results/clientpositive/perf/spark/query1.q.out index 9c51153bba..4e6c42ad79 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query1.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query1.q.out @@ -66,7 +66,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: store @@ -92,13 +92,34 @@ STAGE PLANS: Edges: Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 36), Map 13 (PARTITION-LEVEL SORT, 36) Reducer 12 <- Reducer 11 (GROUP PARTITION-LEVEL SORT, 39) - Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 36), Map 8 (PARTITION-LEVEL SORT, 36) - Reducer 4 <- Reducer 3 (GROUP, 39) - Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 559), Reducer 4 (PARTITION-LEVEL SORT, 559) - Reducer 6 <- Reducer 12 (PARTITION-LEVEL SORT, 601), Reducer 5 (PARTITION-LEVEL SORT, 601) - Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 559), Reducer 8 (PARTITION-LEVEL SORT, 559) + Reducer 3 <- Reducer 12 (PARTITION-LEVEL SORT, 601), Reducer 2 (PARTITION-LEVEL SORT, 601) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 7 <- Map 13 (PARTITION-LEVEL SORT, 36), Map 6 (PARTITION-LEVEL SORT, 36) + Reducer 8 <- Reducer 7 (GROUP, 39) #### A masked pattern was here #### Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: customer + filterExpr: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_customer_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized Map 10 Map Operator Tree: TableScan @@ -140,7 +161,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 2 + Map 6 Map Operator Tree: TableScan alias: store_returns @@ -161,47 +182,6 @@ STAGE PLANS: Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 9 - Map Operator Tree: - TableScan - alias: customer - filterExpr: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int), c_customer_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized Reducer 11 Reduce Operator Tree: Join Operator @@ -259,7 +239,64 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 15837566 Data size: 1227103566 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(24,7)) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col4, _col5 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: decimal(17,2)) Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col5, _col6 + Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col5 > _col6) (type: boolean) + Statistics: Num rows: 32266667 Data size: 27749985689 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 32266667 Data size: 27749985689 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 32266667 Data size: 27749985689 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + Reducer 4 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 32266667 Data size: 27749985689 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Reduce Operator Tree: Join Operator condition map: @@ -283,7 +320,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) - Reducer 4 + Reducer 8 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -309,7 +346,7 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col1, _col2, _col3 input vertices: - 0 Map 1 + 0 Map 5 Statistics: Num rows: 34842647 Data size: 2699627989 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) @@ -318,63 +355,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 34842647 Data size: 2699627989 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: decimal(17,2)) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(17,2)), _col5 (type: string) - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col3, _col5, _col6 - Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col3 > _col6) (type: boolean) - Statistics: Num rows: 32266667 Data size: 27749985689 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 32266667 Data size: 27749985689 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 32266667 Data size: 27749985689 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Reducer 7 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 32266667 Data size: 27749985689 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 86000 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query11.q.out b/ql/src/test/results/clientpositive/perf/spark/query11.q.out index 8ec8b0c83a..67787d33d0 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query11.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query11.q.out @@ -164,23 +164,45 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) - Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 975), Reducer 10 (PARTITION-LEVEL SORT, 975) - Reducer 12 <- Reducer 11 (GROUP, 481) - Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154) - Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 706), Reducer 16 (PARTITION-LEVEL SORT, 706) - Reducer 18 <- Reducer 17 (GROUP, 186) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154) - Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 398), Map 25 (PARTITION-LEVEL SORT, 398) - Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 975), Reducer 22 (PARTITION-LEVEL SORT, 975) - Reducer 24 <- Reducer 23 (GROUP, 481) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 706), Reducer 2 (PARTITION-LEVEL SORT, 706) - Reducer 4 <- Reducer 3 (GROUP, 186) - Reducer 5 <- Reducer 12 (PARTITION-LEVEL SORT, 444), Reducer 18 (PARTITION-LEVEL SORT, 444), Reducer 24 (PARTITION-LEVEL SORT, 444), Reducer 4 (PARTITION-LEVEL SORT, 444) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 10 <- Map 9 (PARTITION-LEVEL SORT, 706), Reducer 14 (PARTITION-LEVEL SORT, 706) + Reducer 11 <- Reducer 10 (GROUP, 186) + Reducer 12 <- Reducer 11 (PARTITION-LEVEL SORT, 204), Reducer 18 (PARTITION-LEVEL SORT, 204), Reducer 24 (PARTITION-LEVEL SORT, 204) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 154), Map 15 (PARTITION-LEVEL SORT, 154) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 975), Reducer 20 (PARTITION-LEVEL SORT, 975) + Reducer 18 <- Reducer 17 (GROUP, 481) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 975), Reducer 7 (PARTITION-LEVEL SORT, 975) + Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 398), Map 21 (PARTITION-LEVEL SORT, 398) + Reducer 23 <- Map 22 (PARTITION-LEVEL SORT, 706), Reducer 26 (PARTITION-LEVEL SORT, 706) + Reducer 24 <- Reducer 23 (GROUP, 186) + Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 154), Map 27 (PARTITION-LEVEL SORT, 154) + Reducer 3 <- Reducer 2 (GROUP, 481) + Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 417), Reducer 3 (PARTITION-LEVEL SORT, 417) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: customer + filterExpr: (c_customer_sk is not null and c_customer_id is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_customer_sk is not null and c_customer_id is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) + Execution mode: vectorized + Map 13 Map Operator Tree: TableScan alias: web_sales @@ -201,14 +223,14 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(8,2)) Execution mode: vectorized - Map 13 + Map 15 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) @@ -221,7 +243,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 14 + Map 16 Map Operator Tree: TableScan alias: customer @@ -242,28 +264,28 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) Execution mode: vectorized - Map 15 + Map 19 Map Operator Tree: TableScan - alias: web_sales - filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), (ws_ext_list_price - ws_ext_discount_amt) (type: decimal(8,2)) + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), (ss_ext_list_price - ss_ext_discount_amt) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(8,2)) Execution mode: vectorized - Map 19 + Map 21 Map Operator Tree: TableScan alias: date_dim @@ -283,7 +305,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 20 + Map 22 Map Operator Tree: TableScan alias: customer @@ -304,35 +326,35 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) Execution mode: vectorized - Map 21 + Map 25 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), (ss_ext_list_price - ss_ext_discount_amt) (type: decimal(8,2)) + expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), (ws_ext_list_price - ws_ext_discount_amt) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(8,2)) Execution mode: vectorized - Map 25 + Map 27 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) @@ -345,28 +367,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 26 + Map 6 Map Operator Tree: TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_customer_id is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_customer_sk is not null and c_customer_id is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), (ss_ext_list_price - ss_ext_discount_amt) (type: decimal(8,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(8,2)) Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan alias: date_dim @@ -386,7 +408,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 + Map 9 Map Operator Tree: TableScan alias: customer @@ -407,28 +429,70 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) Execution mode: vectorized - Map 9 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), (ss_ext_list_price - ss_ext_discount_amt) (type: decimal(8,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(8,2)) - Execution mode: vectorized Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col10 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col10) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) + null sort order: zzzzzzz + sort order: +++++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + value expressions: _col7 (type: decimal(18,2)) + Reducer 11 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col7 (type: decimal(18,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(18,2)) + Reducer 12 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3, _col5, _col6 + Statistics: Num rows: 255550078 Data size: 22544702094 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 255550078 Data size: 22544702094 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(18,2)), _col3 (type: decimal(18,2)), _col5 (type: decimal(18,2)), _col6 (type: boolean) + Reducer 14 Reduce Operator Tree: Join Operator condition map: @@ -437,27 +501,27 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(8,2)) - Reducer 11 + Reducer 17 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col10 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string) + aggregations: sum(_col10) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -469,7 +533,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(18,2)) - Reducer 12 + Reducer 18 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -496,7 +560,31 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(18,2)) - Reducer 16 + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col10) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) + null sort order: zzzzzzz + sort order: +++++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col7 (type: decimal(18,2)) + Reducer 20 Reduce Operator Tree: Join Operator condition map: @@ -505,27 +593,27 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(8,2)) - Reducer 17 + Reducer 23 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col10 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string) + aggregations: sum(_col10) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -537,7 +625,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(18,2)) - Reducer 18 + Reducer 24 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -564,7 +652,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(18,2)), _col2 (type: boolean) - Reducer 2 + Reducer 26 Reduce Operator Tree: Join Operator condition map: @@ -581,48 +669,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(8,2)) - Reducer 22 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(8,2)) - Reducer 23 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - null sort order: zzzzzzz - sort order: +++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col7 (type: decimal(18,2)) - Reducer 24 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -642,84 +689,36 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: decimal(18,2)) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - null sort order: zzzzzzz - sort order: +++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col7 (type: decimal(18,2)) Reducer 4 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col7 (type: decimal(18,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(18,2)) - Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 1 to 3 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - 3 _col0 (type: string) - outputColumnNames: _col1, _col3, _col5, _col6, _col8, _col9 - Statistics: Num rows: 1149975359 Data size: 101451160012 Basic stats: COMPLETE Column stats: NONE + 1 _col2 (type: string) + outputColumnNames: _col1, _col2, _col4, _col6, _col8, _col9 + Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col6) THEN (((_col1 / _col5) > (_col9 / _col3))) ELSE (false) END) ELSE (false) END (type: boolean) - Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (_col6 is not null) THEN (CASE WHEN (_col9) THEN (((_col4 / _col8) > (_col2 / _col6))) ELSE (false) END) ELSE (false) END (type: boolean) + Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: string) + expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + - Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -730,6 +729,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(8,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query13.q.out b/ql/src/test/results/clientpositive/perf/spark/query13.q.out index e78c9eefe1..692e5d6163 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query13.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query13.q.out @@ -123,7 +123,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 10 + Map 1 Map Operator Tree: TableScan alias: store @@ -138,8 +138,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col4 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col8 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -148,7 +148,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 2 Map Operator Tree: TableScan alias: household_demographics @@ -163,8 +163,8 @@ STAGE PLANS: Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col3 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -172,55 +172,34 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 403), Map 6 (PARTITION-LEVEL SORT, 403) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) - Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 258), Reducer 3 (PARTITION-LEVEL SORT, 258) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 438), Reducer 8 (PARTITION-LEVEL SORT, 438) + Reducer 5 <- Map 10 (PARTITION-LEVEL SORT, 258), Reducer 4 (PARTITION-LEVEL SORT, 258) + Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 403), Map 9 (PARTITION-LEVEL SORT, 403) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_store_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or (ss_sales_price >= 50) or (ss_sales_price <= 100) or (ss_sales_price >= 150) or (ss_sales_price <= 200)) and ((ss_net_profit >= 100) or (ss_net_profit <= 200) or (ss_net_profit >= 150) or (ss_net_profit <= 300) or (ss_net_profit >= 50) or (ss_net_profit <= 250))) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_store_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or (ss_sales_price >= 50) or (ss_sales_price <= 100) or (ss_sales_price >= 150) or (ss_sales_price <= 200)) and ((ss_net_profit >= 100) or (ss_net_profit <= 200) or (ss_net_profit >= 150) or (ss_net_profit <= 300) or (ss_net_profit >= 50) or (ss_net_profit <= 250))) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_ext_wholesale_cost (type: decimal(7,2)), ss_net_profit BETWEEN 100 AND 200 (type: boolean), ss_net_profit BETWEEN 150 AND 300 (type: boolean), ss_net_profit BETWEEN 50 AND 250 (type: boolean), ss_sales_price BETWEEN 100 AND 150 (type: boolean), ss_sales_price BETWEEN 50 AND 100 (type: boolean), ss_sales_price BETWEEN 150 AND 200 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean) - Execution mode: vectorized - Map 6 + Map 10 Map Operator Tree: TableScan - alias: customer_demographics - filterExpr: (cd_demo_sk is not null and (cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree')) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: (ca_address_sk is not null and (ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV')) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cd_demo_sk is not null and (cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree')) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + predicate: (ca_address_sk is not null and (ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV')) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cd_demo_sk (type: int), (cd_marital_status = 'M') (type: boolean), (cd_education_status = '4 yr Degree') (type: boolean), (cd_marital_status = 'D') (type: boolean), (cd_education_status = 'Primary') (type: boolean), (cd_marital_status = 'U') (type: boolean), (cd_education_status = 'Advanced Degree') (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + expressions: ca_address_sk (type: int), (ca_state) IN ('KY', 'GA', 'NM') (type: boolean), (ca_state) IN ('MT', 'OR', 'IN') (type: boolean), (ca_state) IN ('WI', 'MO', 'WV') (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean) Execution mode: vectorized - Map 7 + Map 3 Map Operator Tree: TableScan alias: date_dim @@ -240,45 +219,49 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map 7 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_store_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or (ss_sales_price >= 50) or (ss_sales_price <= 100) or (ss_sales_price >= 150) or (ss_sales_price <= 200)) and ((ss_net_profit >= 100) or (ss_net_profit <= 200) or (ss_net_profit >= 150) or (ss_net_profit <= 300) or (ss_net_profit >= 50) or (ss_net_profit <= 250))) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_store_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or (ss_sales_price >= 50) or (ss_sales_price <= 100) or (ss_sales_price >= 150) or (ss_sales_price <= 200)) and ((ss_net_profit >= 100) or (ss_net_profit <= 200) or (ss_net_profit >= 150) or (ss_net_profit <= 300) or (ss_net_profit >= 50) or (ss_net_profit <= 250))) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_ext_wholesale_cost (type: decimal(7,2)), ss_net_profit BETWEEN 100 AND 200 (type: boolean), ss_net_profit BETWEEN 150 AND 300 (type: boolean), ss_net_profit BETWEEN 50 AND 250 (type: boolean), ss_sales_price BETWEEN 100 AND 150 (type: boolean), ss_sales_price BETWEEN 50 AND 100 (type: boolean), ss_sales_price BETWEEN 150 AND 200 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean) + Execution mode: vectorized Map 9 Map Operator Tree: TableScan - alias: customer_address - filterExpr: (ca_address_sk is not null and (ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV')) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: customer_demographics + filterExpr: (cd_demo_sk is not null and (cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree')) (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ca_address_sk is not null and (ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV')) (type: boolean) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + predicate: (cd_demo_sk is not null and (cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree')) (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int), (ca_state) IN ('KY', 'GA', 'NM') (type: boolean), (ca_state) IN ('MT', 'OR', 'IN') (type: boolean), (ca_state) IN ('WI', 'MO', 'WV') (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + expressions: cd_demo_sk (type: int), (cd_marital_status = 'M') (type: boolean), (cd_education_status = '4 yr Degree') (type: boolean), (cd_marital_status = 'D') (type: boolean), (cd_education_status = 'Primary') (type: boolean), (cd_marital_status = 'U') (type: boolean), (cd_education_status = 'Advanced Degree') (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean) Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean) - Reducer 3 + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -288,29 +271,29 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19, _col20 + outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19, _col20, _col23, _col24 + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col1, _col2, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col19, _col20, _col21, _col22, _col23, _col24 input vertices: - 1 Map 8 + 0 Map 2 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col15 and _col16 and _col11 and _col23) or (_col17 and _col18 and _col12 and _col24) or (_col19 and _col20 and _col13 and _col24)) (type: boolean) + predicate: ((_col19 and _col20 and _col15 and _col1) or (_col21 and _col22 and _col16 and _col2) or (_col23 and _col24 and _col17 and _col2)) (type: boolean) Statistics: Num rows: 143746917 Data size: 12681394753 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int) + key expressions: _col7 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col3 (type: int) + Map-reduce partition columns: _col7 (type: int) Statistics: Num rows: 143746917 Data size: 12681394753 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean) - Reducer 4 + value expressions: _col8 (type: int), _col9 (type: int), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean) + Reducer 5 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -318,35 +301,39 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 _col7 (type: int) 1 _col0 (type: int) - outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col26, _col27, _col28 + outputColumnNames: _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col26, _col27, _col28 Statistics: Num rows: 158121612 Data size: 13949534530 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col26 and _col8) or (_col27 and _col9) or (_col28 and _col10)) (type: boolean) + predicate: ((_col26 and _col12) or (_col27 and _col13) or (_col28 and _col14)) (type: boolean) Statistics: Num rows: 118591209 Data size: 10462150897 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col6, _col7 - input vertices: - 1 Map 10 - Statistics: Num rows: 130450332 Data size: 11508366236 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5), count(_col5), sum(_col6), count(_col6), sum(_col7), count(_col7) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Select Operator + expressions: _col8 (type: int), _col9 (type: int), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)) + outputColumnNames: _col8, _col9, _col10, _col11 + Statistics: Num rows: 118591209 Data size: 10462150897 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col8 (type: int) + outputColumnNames: _col10, _col11, _col12 + input vertices: + 0 Map 1 + Statistics: Num rows: 130450332 Data size: 11508366236 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col10), count(_col10), sum(_col11), count(_col11), sum(_col12), count(_col12) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(17,2)), _col3 (type: bigint), _col4 (type: decimal(17,2)), _col5 (type: bigint) - Reducer 5 + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(17,2)), _col3 (type: bigint), _col4 (type: decimal(17,2)), _col5 (type: bigint) + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -365,6 +352,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query15.q.out b/ql/src/test/results/clientpositive/perf/spark/query15.q.out index 33deb4bbfe..5ac43a29eb 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query15.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query15.q.out @@ -54,55 +54,35 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 305), Map 7 (PARTITION-LEVEL SORT, 305) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 873), Reducer 2 (PARTITION-LEVEL SORT, 873) - Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 686), Reducer 3 (PARTITION-LEVEL SORT, 686) - Reducer 5 <- Reducer 4 (GROUP, 406) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 686), Reducer 6 (PARTITION-LEVEL SORT, 686) + Reducer 3 <- Reducer 2 (GROUP, 406) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 873), Reducer 8 (PARTITION-LEVEL SORT, 873) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 305), Map 9 (PARTITION-LEVEL SORT, 305) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_sales_price (type: decimal(7,2)), (cs_sales_price > 500) (type: boolean) + expressions: ca_address_sk (type: int), ca_zip (type: string), (substr(ca_zip, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') (type: boolean), (ca_state) IN ('CA', 'WA', 'GA') (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: boolean) - Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: boolean), _col3 (type: boolean) Execution mode: vectorized - Map 8 + Map 5 Map Operator Tree: TableScan alias: customer @@ -123,26 +103,46 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map 9 + Map 7 Map Operator Tree: TableScan - alias: customer_address - filterExpr: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int), ca_zip (type: string), (substr(ca_zip, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') (type: boolean), (ca_state) IN ('CA', 'WA', 'GA') (type: boolean) + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_sales_price (type: decimal(7,2)), (cs_sales_price > 500) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: boolean), _col3 (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: boolean) + Execution mode: vectorized + Map 9 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -151,53 +151,19 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col3 (type: boolean) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col6 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col6 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col3 (type: boolean) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col6 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col8, _col9, _col10 + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col8, _col9 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col3 or _col9 or _col10) (type: boolean) + predicate: (_col9 or _col2 or _col3) (type: boolean) Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: decimal(7,2)), _col8 (type: string) - outputColumnNames: _col2, _col8 + expressions: _col1 (type: string), _col8 (type: decimal(7,2)) + outputColumnNames: _col1, _col8 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col8 (type: string) + aggregations: sum(_col8) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 @@ -210,7 +176,7 @@ STAGE PLANS: Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: decimal(17,2)) - Reducer 5 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -226,7 +192,7 @@ STAGE PLANS: Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: decimal(17,2)) - Reducer 6 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -243,6 +209,40 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col4, _col5 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)), _col5 (type: boolean) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col3 (type: boolean) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query16.q.out b/ql/src/test/results/clientpositive/perf/spark/query16.q.out index d58f4e85b4..4d1bda22ae 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query16.q.out @@ -80,42 +80,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 1 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00') (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: call_center + filterExpr: (cc_call_center_sk is not null and (cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County')) (type: boolean) + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00') (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + predicate: (cc_call_center_sk is not null and (cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County')) (type: boolean) + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) + expressions: cc_call_center_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) - 1 _col0 (type: int) + 1 _col2 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work Map 9 Map Operator Tree: TableScan - alias: call_center - filterExpr: (cc_call_center_sk is not null and (cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County')) (type: boolean) - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00') (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cc_call_center_sk is not null and (cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County')) (type: boolean) - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + predicate: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00') (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cc_call_center_sk (type: int) + expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -125,34 +125,13 @@ STAGE PLANS: Spark Edges: Reducer 12 <- Map 11 (GROUP, 24) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 464), Map 7 (PARTITION-LEVEL SORT, 464) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 711), Reducer 2 (PARTITION-LEVEL SORT, 711) - Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 459), Reducer 3 (PARTITION-LEVEL SORT, 459) - Reducer 5 <- Reducer 4 (GROUP, 246) - Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 464), Map 8 (PARTITION-LEVEL SORT, 464) + Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 711), Reducer 3 (PARTITION-LEVEL SORT, 711) + Reducer 5 <- Reducer 12 (PARTITION-LEVEL SORT, 459), Reducer 4 (PARTITION-LEVEL SORT, 459) + Reducer 6 <- Reducer 5 (GROUP, 246) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: cs1 - filterExpr: (cs_ship_date_sk is not null and cs_ship_addr_sk is not null and cs_call_center_sk is not null and cs_order_number is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cs_ship_date_sk is not null and cs_ship_addr_sk is not null and cs_call_center_sk is not null and cs_order_number is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cs_ship_date_sk (type: int), cs_ship_addr_sk (type: int), cs_call_center_sk (type: int), cs_warehouse_sk (type: int), cs_order_number (type: int), cs_ext_ship_cost (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - Execution mode: vectorized Map 10 Map Operator Tree: TableScan @@ -202,7 +181,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 2 + Map Operator Tree: + TableScan + alias: cs1 + filterExpr: (cs_ship_date_sk is not null and cs_ship_addr_sk is not null and cs_call_center_sk is not null and cs_order_number is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cs_ship_date_sk is not null and cs_ship_addr_sk is not null and cs_call_center_sk is not null and cs_order_number is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cs_ship_date_sk (type: int), cs_ship_addr_sk (type: int), cs_call_center_sk (type: int), cs_warehouse_sk (type: int), cs_order_number (type: int), cs_ext_ship_cost (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: customer_address @@ -241,7 +241,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 14399440 Data size: 1528617286 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: boolean) - Reducer 2 + Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -261,26 +261,30 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col2, _col3, _col4, _col5, _col6 input vertices: - 1 Map 8 + 1 Map 9 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col6 + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col5, _col6, _col7, _col8 input vertices: - 1 Map 9 + 0 Map 1 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col4 (type: int) + Select Operator + expressions: _col5 (type: int), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) + outputColumnNames: _col3, _col4, _col5, _col6 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - Reducer 3 + Reduce Output Operator + key expressions: _col4 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -302,7 +306,7 @@ STAGE PLANS: Map-reduce partition columns: _col4 (type: int) Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - Reducer 4 + Reducer 5 Reduce Operator Tree: Join Operator condition map: @@ -333,7 +337,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 231905279 Data size: 31404633508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -352,7 +356,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 6 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query17.q.out b/ql/src/test/results/clientpositive/perf/spark/query17.q.out index 0bd37e1e12..2be0a4a1c0 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query17.q.out @@ -110,7 +110,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 16 + Map 1 Map Operator Tree: TableScan alias: store @@ -125,8 +125,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col6 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -134,38 +134,38 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306) - Reducer 11 <- Reducer 10 (PARTITION-LEVEL SORT, 374), Reducer 14 (PARTITION-LEVEL SORT, 374) - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 36), Map 15 (PARTITION-LEVEL SORT, 36) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442) - Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 850), Reducer 3 (PARTITION-LEVEL SORT, 850) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 306), Map 13 (PARTITION-LEVEL SORT, 306) + Reducer 12 <- Reducer 11 (PARTITION-LEVEL SORT, 374), Reducer 15 (PARTITION-LEVEL SORT, 374) + Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 36), Map 16 (PARTITION-LEVEL SORT, 36) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 442), Reducer 8 (PARTITION-LEVEL SORT, 442) + Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 850), Reducer 3 (PARTITION-LEVEL SORT, 850) Reducer 5 <- Reducer 4 (GROUP, 582) Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: - Map 1 + Map 10 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Map 12 + Map 13 Map Operator Tree: TableScan alias: d3 @@ -185,7 +185,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 13 + Map 14 Map Operator Tree: TableScan alias: store_returns @@ -206,7 +206,7 @@ STAGE PLANS: Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized - Map 15 + Map 16 Map Operator Tree: TableScan alias: d2 @@ -226,27 +226,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: d1 - filterExpr: (d_date_sk is not null and (d_quarter_name = '2000Q1')) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d_date_sk is not null and (d_quarter_name = '2000Q1')) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 8 + Map 2 Map Operator Tree: TableScan alias: item @@ -267,28 +247,48 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized + Map 7 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) + Execution mode: vectorized Map 9 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: d1 + filterExpr: (d_date_sk is not null and (d_quarter_name = '2000Q1')) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: (d_date_sk is not null and (d_quarter_name = '2000Q1')) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 10 + Reducer 11 Reduce Operator Tree: Join Operator condition map: @@ -305,7 +305,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int), _col1 (type: int) Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int) - Reducer 11 + Reducer 12 Reduce Operator Tree: Join Operator condition map: @@ -322,7 +322,7 @@ STAGE PLANS: Map-reduce partition columns: _col6 (type: int), _col7 (type: int), _col8 (type: int) Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col9 (type: int) - Reducer 14 + Reducer 15 Reduce Operator Tree: Join Operator condition map: @@ -339,40 +339,23 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int), _col2 (type: int) Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col4 (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col8, _col9 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + key expressions: _col4 (type: int), _col5 (type: int), _col7 (type: int) null sort order: zzz sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + Map-reduce partition columns: _col4 (type: int), _col5 (type: int), _col7 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: int), _col8 (type: string), _col9 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col6 (type: int), _col8 (type: int) Reducer 4 Local Work: Map Reduce Local Work @@ -381,22 +364,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) + 0 _col4 (type: int), _col5 (type: int), _col7 (type: int) 1 _col6 (type: int), _col7 (type: int), _col8 (type: int) - outputColumnNames: _col3, _col5, _col8, _col9, _col13, _col19 + outputColumnNames: _col1, _col2, _col6, _col8, _col13, _col19 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col8, _col9, _col13, _col19, _col22 + 0 _col0 (type: int) + 1 _col6 (type: int) + outputColumnNames: _col1, _col3, _col4, _col10, _col15, _col21 input vertices: - 1 Map 16 + 0 Map 1 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: string), _col9 (type: string), _col22 (type: string), _col5 (type: int), _col19 (type: int), _col13 (type: int), UDFToDouble(_col5) (type: double), (UDFToDouble(_col5) * UDFToDouble(_col5)) (type: double), UDFToDouble(_col19) (type: double), (UDFToDouble(_col19) * UDFToDouble(_col19)) (type: double), UDFToDouble(_col13) (type: double), (UDFToDouble(_col13) * UDFToDouble(_col13)) (type: double) + expressions: _col3 (type: string), _col4 (type: string), _col1 (type: string), _col10 (type: int), _col21 (type: int), _col15 (type: int), UDFToDouble(_col10) (type: double), (UDFToDouble(_col10) * UDFToDouble(_col10)) (type: double), UDFToDouble(_col21) (type: double), (UDFToDouble(_col21) * UDFToDouble(_col21)) (type: double), UDFToDouble(_col15) (type: double), (UDFToDouble(_col15) * UDFToDouble(_col15)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -451,6 +434,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query18.q.out b/ql/src/test/results/clientpositive/perf/spark/query18.q.out index 9b93232ca2..2b3ecd930c 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query18.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query18.q.out @@ -86,80 +86,59 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 307), Map 13 (PARTITION-LEVEL SORT, 307) - Reducer 12 <- Map 14 (PARTITION-LEVEL SORT, 336), Reducer 11 (PARTITION-LEVEL SORT, 336) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 8 (PARTITION-LEVEL SORT, 855) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597) - Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) - Reducer 5 <- Map 15 (PARTITION-LEVEL SORT, 411), Reducer 4 (PARTITION-LEVEL SORT, 411) - Reducer 6 <- Reducer 5 (GROUP, 1009) - Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 336), Reducer 14 (PARTITION-LEVEL SORT, 336) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 307), Map 15 (PARTITION-LEVEL SORT, 307) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 411), Reducer 7 (PARTITION-LEVEL SORT, 411) + Reducer 3 <- Reducer 2 (GROUP, 1009) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 597), Reducer 9 (PARTITION-LEVEL SORT, 597) + Reducer 7 <- Reducer 12 (PARTITION-LEVEL SORT, 1009), Reducer 6 (PARTITION-LEVEL SORT, 1009) + Reducer 9 <- Map 10 (PARTITION-LEVEL SORT, 855), Map 8 (PARTITION-LEVEL SORT, 855) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: customer_address - filterExpr: (ca_address_sk is not null and (ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN')) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: item + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ca_address_sk is not null and (ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN')) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int), ca_county (type: string), ca_state (type: string), ca_country (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + expressions: i_item_sk (type: int), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 10 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk is not null and cs_item_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: customer + filterExpr: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_customer_sk is not null and c_current_cdemo_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk is not null and cs_item_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_customer_sk is not null and c_current_cdemo_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), CAST( cs_quantity AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_list_price AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_coupon_amt AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_sales_price AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_net_profit AS decimal(12,2)) (type: decimal(12,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), CAST( c_birth_year AS decimal(12,2)) (type: decimal(12,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: decimal(12,2)), _col5 (type: decimal(12,2)), _col6 (type: decimal(12,2)), _col7 (type: decimal(12,2)), _col8 (type: decimal(12,2)) - Execution mode: vectorized - Map 13 - Map Operator Tree: - TableScan - alias: cd1 - filterExpr: (cd_demo_sk is not null and (cd_gender = 'M') and (cd_education_status = 'College')) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cd_demo_sk is not null and (cd_gender = 'M') and (cd_education_status = 'College')) (type: boolean) - Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cd_demo_sk (type: int), CAST( cd_dep_count AS decimal(12,2)) (type: decimal(12,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(12,2)) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: decimal(12,2)) Execution mode: vectorized - Map 14 + Map 11 Map Operator Tree: TableScan alias: date_dim @@ -179,49 +158,49 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 15 + Map 13 Map Operator Tree: TableScan - alias: item - filterExpr: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk is not null and cs_item_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk is not null and cs_item_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int), i_item_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), CAST( cs_quantity AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_list_price AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_coupon_amt AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_sales_price AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_net_profit AS decimal(12,2)) (type: decimal(12,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: decimal(12,2)), _col5 (type: decimal(12,2)), _col6 (type: decimal(12,2)), _col7 (type: decimal(12,2)), _col8 (type: decimal(12,2)) Execution mode: vectorized - Map 8 + Map 15 Map Operator Tree: TableScan - alias: customer - filterExpr: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_customer_sk is not null and c_current_cdemo_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: cd1 + filterExpr: (cd_demo_sk is not null and (cd_gender = 'M') and (cd_education_status = 'College')) (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_customer_sk is not null and c_current_cdemo_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: (cd_demo_sk is not null and (cd_gender = 'M') and (cd_education_status = 'College')) (type: boolean) + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), CAST( c_birth_year AS decimal(12,2)) (type: decimal(12,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + expressions: cd_demo_sk (type: int), CAST( cd_dep_count AS decimal(12,2)) (type: decimal(12,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: decimal(12,2)) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(12,2)) Execution mode: vectorized - Map 9 + Map 5 Map Operator Tree: TableScan alias: cd2 @@ -241,23 +220,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 11 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(12,2)), _col5 (type: decimal(12,2)), _col6 (type: decimal(12,2)), _col7 (type: decimal(12,2)), _col8 (type: decimal(12,2)), _col10 (type: decimal(12,2)) + Map 8 + Map Operator Tree: + TableScan + alias: customer_address + filterExpr: (ca_address_sk is not null and (ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN')) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ca_address_sk is not null and (ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN')) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_county (type: string), ca_state (type: string), ca_country (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Execution mode: vectorized Reducer 12 Reduce Operator Tree: Join Operator @@ -266,83 +249,45 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col8, _col9, _col11 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(12,2)), _col5 (type: decimal(12,2)), _col6 (type: decimal(12,2)), _col7 (type: decimal(12,2)), _col8 (type: decimal(12,2)), _col10 (type: decimal(12,2)) - outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col8, _col9, _col11 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int), _col5 (type: decimal(12,2)), _col6 (type: decimal(12,2)), _col7 (type: decimal(12,2)), _col8 (type: decimal(12,2)), _col9 (type: decimal(12,2)), _col11 (type: decimal(12,2)) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col5 (type: int) + key expressions: _col2 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col7 (type: decimal(12,2)) - Reducer 3 + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: decimal(12,2)), _col6 (type: decimal(12,2)), _col7 (type: decimal(12,2)), _col8 (type: decimal(12,2)), _col9 (type: decimal(12,2)), _col11 (type: decimal(12,2)) + Reducer 14 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col5 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col7 - Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: decimal(12,2)) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col1, _col2, _col3, _col7, _col13, _col14, _col15, _col16, _col17, _col18, _col20 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col13 (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col13 (type: int) - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: decimal(12,2)), _col14 (type: decimal(12,2)), _col15 (type: decimal(12,2)), _col16 (type: decimal(12,2)), _col17 (type: decimal(12,2)), _col18 (type: decimal(12,2)), _col20 (type: decimal(12,2)) - Reducer 5 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(12,2)), _col5 (type: decimal(12,2)), _col6 (type: decimal(12,2)), _col7 (type: decimal(12,2)), _col8 (type: decimal(12,2)), _col10 (type: decimal(12,2)) + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col13 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col7, _col14, _col15, _col16, _col17, _col18, _col20, _col22 + 0 _col0 (type: int) + 1 _col13 (type: int) + outputColumnNames: _col1, _col4, _col5, _col6, _col10, _col16, _col17, _col18, _col19, _col20, _col22 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col14), count(_col14), sum(_col15), count(_col15), sum(_col16), count(_col16), sum(_col17), count(_col17), sum(_col18), count(_col18), sum(_col7), count(_col7), sum(_col20), count(_col20) - keys: _col22 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), 0L (type: bigint) + aggregations: sum(_col16), count(_col16), sum(_col17), count(_col17), sum(_col18), count(_col18), sum(_col19), count(_col19), sum(_col20), count(_col20), sum(_col10), count(_col10), sum(_col22), count(_col22) + keys: _col1 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), 0L (type: bigint) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 @@ -354,7 +299,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) Statistics: Num rows: 2108229765 Data size: 285496662075 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(22,2)), _col6 (type: bigint), _col7 (type: decimal(22,2)), _col8 (type: bigint), _col9 (type: decimal(22,2)), _col10 (type: bigint), _col11 (type: decimal(22,2)), _col12 (type: bigint), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: decimal(22,2)), _col16 (type: bigint), _col17 (type: decimal(22,2)), _col18 (type: bigint) - Reducer 6 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -375,7 +320,7 @@ STAGE PLANS: Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col4 (type: decimal(16,6)), _col5 (type: decimal(16,6)), _col6 (type: decimal(16,6)), _col7 (type: decimal(16,6)), _col8 (type: decimal(16,6)), _col9 (type: decimal(16,6)), _col10 (type: decimal(16,6)) - Reducer 7 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -392,6 +337,57 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col5 (type: int) + outputColumnNames: _col2, _col3, _col4, _col5, _col8 + Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col8 (type: decimal(12,2)) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col2, _col3, _col4, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col20 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col13 (type: int) + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col8 (type: decimal(12,2)), _col14 (type: decimal(12,2)), _col15 (type: decimal(12,2)), _col16 (type: decimal(12,2)), _col17 (type: decimal(12,2)), _col18 (type: decimal(12,2)), _col20 (type: decimal(12,2)) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col7 (type: decimal(12,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query19.q.out b/ql/src/test/results/clientpositive/perf/spark/query19.q.out index fded31980f..874cd89fdd 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query19.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query19.q.out @@ -70,7 +70,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 12 + Map 1 Map Operator Tree: TableScan alias: store @@ -85,8 +85,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col12 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -94,15 +94,15 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 440), Reducer 2 (PARTITION-LEVEL SORT, 440) - Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) - Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 846), Reducer 4 (PARTITION-LEVEL SORT, 846) - Reducer 6 <- Reducer 5 (GROUP, 640) - Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 12 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 846), Reducer 7 (PARTITION-LEVEL SORT, 846) + Reducer 4 <- Reducer 3 (GROUP, 640) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1009), Reducer 9 (PARTITION-LEVEL SORT, 1009) + Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 440), Reducer 11 (PARTITION-LEVEL SORT, 440) #### A masked pattern was here #### Vertices: - Map 1 + Map 10 Map Operator Tree: TableScan alias: store_sales @@ -123,28 +123,27 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map 10 + Map 12 Map Operator Tree: TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 11 + Map 2 Map Operator Tree: TableScan alias: customer_address @@ -165,27 +164,28 @@ STAGE PLANS: Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 8 + Map 6 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: customer + filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized - Map 9 + Map 8 Map Operator Tree: TableScan alias: item @@ -206,7 +206,7 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: string) Execution mode: vectorized - Reducer 2 + Reducer 11 Reduce Operator Tree: Join Operator condition map: @@ -224,40 +224,6 @@ STAGE PLANS: Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col7, _col8, _col9, _col10 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: string), _col9 (type: int), _col10 (type: string) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col10, _col12 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col12 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col12 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: string), _col9 (type: int), _col10 (type: string) - Reducer 5 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -265,29 +231,29 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col12 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col10, _col14 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col5, _col6, _col7, _col8, _col12, _col13 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col7, _col8, _col9, _col10, _col14, _col16 + 0 _col0 (type: int) + 1 _col12 (type: int) + outputColumnNames: _col1, _col3, _col7, _col8, _col9, _col10, _col15 input vertices: - 1 Map 12 + 0 Map 1 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col14 <> _col16) (type: boolean) + predicate: (_col3 <> _col1) (type: boolean) Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: string), _col9 (type: int), _col10 (type: string) - outputColumnNames: _col4, _col7, _col8, _col9, _col10 + expressions: _col7 (type: int), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col15 (type: decimal(7,2)) + outputColumnNames: _col7, _col8, _col9, _col10, _col15 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col4) + aggregations: sum(_col15) keys: _col8 (type: string), _col7 (type: int), _col9 (type: int), _col10 (type: string) minReductionHashAggr: 0.99 mode: hash @@ -300,7 +266,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: string) Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(17,2)) - Reducer 6 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -319,7 +285,7 @@ STAGE PLANS: sort order: -++++ Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 7 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -336,6 +302,40 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col7 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col10, _col11 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: decimal(7,2)) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col7, _col8, _col9 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col7 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col7 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col8 (type: int), _col9 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query21.q.out b/ql/src/test/results/clientpositive/perf/spark/query21.q.out index 1b7b6771db..fa923a5f28 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query21.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query21.q.out @@ -76,42 +76,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 6 + Map 1 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00') (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: warehouse + filterExpr: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00') (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + predicate: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), (CAST( d_date AS DATE) < DATE'1998-04-08') (type: boolean), (CAST( d_date AS DATE) >= DATE'1998-04-08') (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) - 1 _col0 (type: int) + 1 _col2 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work Map 7 Map Operator Tree: TableScan - alias: warehouse - filterExpr: w_warehouse_sk is not null (type: boolean) - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00') (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: w_warehouse_sk is not null (type: boolean) - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + predicate: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00') (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int), (CAST( d_date AS DATE) < DATE'1998-04-08') (type: boolean), (CAST( d_date AS DATE) >= DATE'1998-04-08') (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -120,12 +120,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 5 (PARTITION-LEVEL SORT, 6) - Reducer 3 <- Reducer 2 (GROUP, 7) - Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 6), Map 6 (PARTITION-LEVEL SORT, 6) + Reducer 4 <- Reducer 3 (GROUP, 7) + Reducer 5 <- Reducer 4 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: inventory @@ -146,7 +146,7 @@ STAGE PLANS: Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan alias: item @@ -167,7 +167,7 @@ STAGE PLANS: Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Reducer 2 + Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -187,20 +187,20 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col2, _col3, _col5, _col7, _col8 input vertices: - 1 Map 6 + 1 Map 7 Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col7, _col8, _col10 + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col5, _col7, _col9, _col10 input vertices: - 1 Map 7 + 0 Map 1 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col10 (type: string), _col5 (type: string), CASE WHEN (_col7) THEN (_col3) ELSE (0) END (type: int), CASE WHEN (_col8) THEN (_col3) ELSE (0) END (type: int) + expressions: _col1 (type: string), _col7 (type: string), CASE WHEN (_col9) THEN (_col5) ELSE (0) END (type: int), CASE WHEN (_col10) THEN (_col5) ELSE (0) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -217,7 +217,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -236,7 +236,7 @@ STAGE PLANS: Statistics: Num rows: 6253038 Data size: 98796990 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint), _col3 (type: bigint) - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query22.q.out b/ql/src/test/results/clientpositive/perf/spark/query22.q.out index ade79e6f89..71ba04c4aa 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query22.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query22.q.out @@ -60,7 +60,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 5 Map Operator Tree: TableScan alias: warehouse @@ -75,8 +75,8 @@ STAGE PLANS: Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -84,13 +84,34 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 6 (PARTITION-LEVEL SORT, 6) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 11), Reducer 2 (PARTITION-LEVEL SORT, 11) - Reducer 4 <- Reducer 3 (GROUP, 31) - Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 11), Reducer 7 (PARTITION-LEVEL SORT, 11) + Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 6), Map 8 (PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: item + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string), i_product_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Execution mode: vectorized + Map 6 Map Operator Tree: TableScan alias: inventory @@ -111,7 +132,7 @@ STAGE PLANS: Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Map 6 + Map 8 Map Operator Tree: TableScan alias: date_dim @@ -131,69 +152,19 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: item - filterExpr: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string), i_product_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) - Execution mode: vectorized Reducer 2 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - input vertices: - 1 Map 7 - Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col7, _col8, _col9, _col10 + 1 _col2 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col9 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col3), count(_col3) - keys: _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), 0L (type: bigint) + aggregations: sum(_col9), count(_col9) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), 0L (type: bigint) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -205,7 +176,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) Statistics: Num rows: 250121525 Data size: 3951879695 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint), _col6 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -225,7 +196,7 @@ STAGE PLANS: sort order: +++++ Statistics: Num rows: 125060762 Data size: 1975939839 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 5 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -242,6 +213,35 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col2, _col4 + input vertices: + 0 Map 5 + Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query23.q.out b/ql/src/test/results/clientpositive/perf/spark/query23.q.out index 07e9323b4c..652fd4adfa 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query23.q.out @@ -1,5 +1,5 @@ -Warning: Map Join MAPJOIN[210][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[211][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[216][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[217][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -126,13 +126,33 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 18 <- Map 17 (PARTITION-LEVEL SORT, 398), Map 22 (PARTITION-LEVEL SORT, 398) - Reducer 19 <- Map 23 (PARTITION-LEVEL SORT, 975), Reducer 18 (PARTITION-LEVEL SORT, 975) - Reducer 20 <- Reducer 19 (GROUP, 481) - Reducer 21 <- Reducer 20 (GROUP, 1) + Reducer 18 <- Map 17 (PARTITION-LEVEL SORT, 975), Reducer 22 (PARTITION-LEVEL SORT, 975) + Reducer 19 <- Reducer 18 (GROUP, 481) + Reducer 20 <- Reducer 19 (GROUP, 1) + Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 398), Map 23 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: Map 17 + Map Operator Tree: + TableScan + alias: customer + filterExpr: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 21 Map Operator Tree: TableScan alias: store_sales @@ -153,7 +173,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(18,2)) Execution mode: vectorized - Map 22 + Map 23 Map Operator Tree: TableScan alias: date_dim @@ -173,26 +193,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 23 - Map Operator Tree: - TableScan - alias: customer - filterExpr: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Reducer 18 Reduce Operator Tree: Join Operator @@ -200,29 +200,12 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(18,2)) - Reducer 19 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + 1 _col1 (type: int) + outputColumnNames: _col0, _col3 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col4 (type: int) + aggregations: sum(_col3) + keys: _col0 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 @@ -234,7 +217,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(28,2)) - Reducer 20 + Reducer 19 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -258,7 +241,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(28,2)) - Reducer 21 + Reducer 20 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -279,17 +262,54 @@ STAGE PLANS: keys: 0 1 + Reducer 22 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(18,2)) Stage: Stage-3 Spark Edges: - Reducer 40 <- Map 39 (PARTITION-LEVEL SORT, 398), Map 44 (PARTITION-LEVEL SORT, 398) - Reducer 41 <- Map 45 (PARTITION-LEVEL SORT, 975), Reducer 40 (PARTITION-LEVEL SORT, 975) - Reducer 42 <- Reducer 41 (GROUP, 481) - Reducer 43 <- Reducer 42 (GROUP, 1) + Reducer 40 <- Map 39 (PARTITION-LEVEL SORT, 975), Reducer 44 (PARTITION-LEVEL SORT, 975) + Reducer 41 <- Reducer 40 (GROUP, 481) + Reducer 42 <- Reducer 41 (GROUP, 1) + Reducer 44 <- Map 43 (PARTITION-LEVEL SORT, 398), Map 45 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: Map 39 + Map Operator Tree: + TableScan + alias: customer + filterExpr: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 43 Map Operator Tree: TableScan alias: store_sales @@ -310,7 +330,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(18,2)) Execution mode: vectorized - Map 44 + Map 45 Map Operator Tree: TableScan alias: date_dim @@ -330,26 +350,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 45 - Map Operator Tree: - TableScan - alias: customer - filterExpr: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Reducer 40 Reduce Operator Tree: Join Operator @@ -357,29 +357,12 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(18,2)) - Reducer 41 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + 1 _col1 (type: int) + outputColumnNames: _col0, _col3 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col4 (type: int) + aggregations: sum(_col3) + keys: _col0 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 @@ -391,7 +374,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(28,2)) - Reducer 42 + Reducer 41 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -415,7 +398,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(28,2)) - Reducer 43 + Reducer 42 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -436,26 +419,64 @@ STAGE PLANS: keys: 0 1 + Reducer 44 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(18,2)) Stage: Stage-1 Spark Edges: - Reducer 10 <- Reducer 31 (GROUP PARTITION-LEVEL SORT, 481) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 305), Map 12 (PARTITION-LEVEL SORT, 305) Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 935), Map 16 (PARTITION-LEVEL SORT, 935) Reducer 15 <- Reducer 14 (GROUP, 437) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 305), Map 28 (PARTITION-LEVEL SORT, 305) - Reducer 25 <- Map 24 (PARTITION-LEVEL SORT, 154), Map 28 (PARTITION-LEVEL SORT, 154) - Reducer 26 <- Reducer 10 (PARTITION-LEVEL SORT, 209), Reducer 25 (PARTITION-LEVEL SORT, 209) - Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 444), Reducer 37 (PARTITION-LEVEL SORT, 444) - Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 376), Reducer 2 (PARTITION-LEVEL SORT, 376) - Reducer 30 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 29 (PARTITION-LEVEL SORT, 398) - Reducer 31 <- Map 12 (PARTITION-LEVEL SORT, 442), Reducer 30 (PARTITION-LEVEL SORT, 442) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 442), Reducer 30 (PARTITION-LEVEL SORT, 442) + Reducer 26 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 481) + Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 209), Reducer 33 (PARTITION-LEVEL SORT, 209) + Reducer 28 <- Reducer 27 (PARTITION-LEVEL SORT, 444), Reducer 37 (PARTITION-LEVEL SORT, 444) + Reducer 30 <- Map 29 (PARTITION-LEVEL SORT, 398), Map 31 (PARTITION-LEVEL SORT, 398) + Reducer 33 <- Map 12 (PARTITION-LEVEL SORT, 154), Map 32 (PARTITION-LEVEL SORT, 154) Reducer 37 <- Reducer 14 (GROUP, 437) - Reducer 4 <- Reducer 15 (PARTITION-LEVEL SORT, 628), Reducer 3 (PARTITION-LEVEL SORT, 628) - Reducer 5 <- Reducer 27 (GROUP, 1), Reducer 4 (GROUP, 1) + Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 376), Reducer 26 (PARTITION-LEVEL SORT, 376) + Reducer 5 <- Reducer 15 (PARTITION-LEVEL SORT, 628), Reducer 4 (PARTITION-LEVEL SORT, 628) + Reducer 6 <- Reducer 28 (GROUP, 1), Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: item + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), substr(i_item_desc, 1, 30) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized + Map 10 Map Operator Tree: TableScan alias: catalog_sales @@ -476,47 +497,25 @@ STAGE PLANS: Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map 11 + Map 12 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year = 1999) and (d_moy = 1) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), d_date (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized - Map 12 - Map Operator Tree: - TableScan - alias: item - filterExpr: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 1999) and (d_moy = 1) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int), substr(i_item_desc, 1, 30) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 13 Map Operator Tree: @@ -559,99 +558,86 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 24 + Map 29 Map Operator Tree: TableScan - alias: web_sales - filterExpr: (ws_item_sk is not null and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ws_item_sk is not null and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_sold_date_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_bill_customer_sk (type: int), ws_quantity (type: int), ws_list_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized - Map 28 + Map 31 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 1999) and (d_moy = 1) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 1999) and (d_moy = 1) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 29 + Map 32 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_item_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (ws_item_sk is not null and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_sold_date_sk is not null and ss_item_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_item_sk is not null and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_bill_customer_sk (type: int), ws_quantity (type: int), ws_list_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Execution mode: vectorized - Reducer 10 + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized + Reducer 11 Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: bigint) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col3 > 4L) (type: boolean) - Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int) - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 58079562 Data size: 5123795819 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 58079562 Data size: 5123795819 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Reducer 14 Reduce Operator Tree: Join Operator @@ -698,7 +684,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2 input vertices: - 1 Reducer 21 + 1 Reducer 20 Statistics: Num rows: 316797606 Data size: 99227438104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col1 > _col2) (type: boolean) @@ -726,59 +712,80 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) - Reducer 25 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int), _col1 (type: string), _col5 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) Reducer 26 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: bigint) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col3 > 4L) (type: boolean) + Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 58079562 Data size: 5123795819 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 58079562 Data size: 5123795819 Basic stats: COMPLETE Column stats: NONE + Reducer 27 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col3, _col4, _col5 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col3 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col2 (type: int) + Map-reduce partition columns: _col3 (type: int) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) - Reducer 27 + value expressions: _col4 (type: int), _col5 (type: decimal(7,2)) + Reducer 28 Reduce Operator Tree: Join Operator condition map: Left Semi Join 0 to 1 - outputColumnNames: _col3, _col4 + outputColumnNames: _col4, _col5 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (CAST( _col3 AS decimal(10,0)) * _col4) (type: decimal(18,2)) + expressions: (CAST( _col4 AS decimal(10,0)) * _col5) (type: decimal(18,2)) outputColumnNames: _col0 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -792,23 +799,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(28,2)) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) Reducer 30 Reduce Operator Tree: Join Operator @@ -826,30 +816,23 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: string) - Reducer 31 + Reducer 33 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col4 (type: int), _col5 (type: string), _col3 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: bigint) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Reducer 37 Execution mode: vectorized Local Work: @@ -872,7 +855,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2 input vertices: - 1 Reducer 43 + 1 Reducer 42 Statistics: Num rows: 316797606 Data size: 99227438104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col1 > _col2) (type: boolean) @@ -894,14 +877,31 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 105599202 Data size: 33075812701 Basic stats: COMPLETE Column stats: NONE Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col2, _col4, _col5 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: decimal(7,2)) + Reducer 5 Reduce Operator Tree: Join Operator condition map: Left Semi Join 0 to 1 - outputColumnNames: _col3, _col4 + outputColumnNames: _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (CAST( _col3 AS decimal(10,0)) * _col4) (type: decimal(18,2)) + expressions: (CAST( _col4 AS decimal(10,0)) * _col5) (type: decimal(18,2)) outputColumnNames: _col0 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -915,7 +915,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(28,2)) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query24.q.out b/ql/src/test/results/clientpositive/perf/spark/query24.q.out index 99b477b194..e38e51f41e 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query24.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[110][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[117][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain with ssales as (select c_last_name @@ -125,7 +125,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 19 + Map 23 Map Operator Tree: TableScan alias: store @@ -149,68 +149,57 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 472), Map 20 (PARTITION-LEVEL SORT, 472) - Reducer 14 <- Map 21 (PARTITION-LEVEL SORT, 1009), Reducer 13 (PARTITION-LEVEL SORT, 1009) - Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 846), Reducer 14 (PARTITION-LEVEL SORT, 846) - Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 587), Reducer 15 (PARTITION-LEVEL SORT, 587) - Reducer 17 <- Reducer 16 (GROUP, 640) - Reducer 18 <- Reducer 17 (GROUP, 1) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 587), Reducer 17 (PARTITION-LEVEL SORT, 587) + Reducer 14 <- Reducer 13 (GROUP, 640) + Reducer 15 <- Reducer 14 (GROUP, 1) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 846), Reducer 19 (PARTITION-LEVEL SORT, 846) + Reducer 19 <- Map 18 (PARTITION-LEVEL SORT, 1009), Reducer 21 (PARTITION-LEVEL SORT, 1009) + Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 472), Map 22 (PARTITION-LEVEL SORT, 472) #### A masked pattern was here #### Vertices: Map 12 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: item + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7, _col8 - input vertices: - 1 Map 19 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col3 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col3 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string) + expressions: i_item_sk (type: int), i_current_price (type: decimal(7,2)), i_size (type: string), i_color (type: string), i_units (type: string), i_manager_id (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 20 + Map 16 Map Operator Tree: TableScan - alias: store_returns - filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: (ca_address_sk is not null and ca_zip is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + predicate: (ca_address_sk is not null and ca_zip is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: sr_item_sk (type: int), sr_ticket_number (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string), upper(ca_country) (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) + key expressions: _col0 (type: int), _col2 (type: string) null sort order: zz sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int), _col2 (type: string) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string) Execution mode: vectorized - Map 21 + Map 18 Map Operator Tree: TableScan alias: customer @@ -231,115 +220,72 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) Execution mode: vectorized - Map 22 + Map 20 Map Operator Tree: TableScan - alias: customer_address - filterExpr: (ca_address_sk is not null and ca_zip is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: store_returns + filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ca_address_sk is not null and ca_zip is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + predicate: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int), ca_state (type: string), ca_zip (type: string), upper(ca_country) (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + expressions: sr_item_sk (type: int), sr_ticket_number (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col2 (type: string) + key expressions: _col0 (type: int), _col1 (type: int) null sort order: zz sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col2 (type: string) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string) + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 23 + Map 22 Map Operator Tree: TableScan - alias: item - filterExpr: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int), i_current_price (type: decimal(7,2)), i_size (type: string), i_color (type: string), i_units (type: string), i_manager_id (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int) + expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7, _col8 + input vertices: + 1 Map 23 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col3 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col3 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string) Execution mode: vectorized + Local Work: + Map Reduce Local Work Reducer 13 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col3 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1, _col4, _col6, _col7, _col8 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string) - Reducer 14 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col4, _col6, _col7, _col8, _col12, _col13, _col14, _col15 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col12 (type: int), _col8 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col12 (type: int), _col8 (type: string) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string) - Reducer 15 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col12 (type: int), _col8 (type: string) - 1 _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col4, _col6, _col7, _col13, _col14, _col15, _col17, _col19 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col15 <> _col19) (type: boolean) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col13 (type: string), _col14 (type: string), _col17 (type: string) - Reducer 16 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col6, _col7, _col13, _col14, _col17, _col21, _col22, _col23, _col24, _col25 + 1 _col11 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7, _col12, _col13, _col21, _col23, _col24 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col4) - keys: _col21 (type: decimal(7,2)), _col22 (type: string), _col23 (type: string), _col24 (type: string), _col25 (type: int), _col17 (type: string), _col13 (type: string), _col14 (type: string), _col6 (type: string), _col7 (type: string) + aggregations: sum(_col21) + keys: _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col7 (type: string), _col12 (type: string), _col13 (type: string), _col23 (type: string), _col24 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -351,7 +297,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string) Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE value expressions: _col10 (type: decimal(17,2)) - Reducer 17 + Reducer 14 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -375,7 +321,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(27,2)), _col1 (type: bigint) - Reducer 18 + Reducer 15 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -396,12 +342,70 @@ STAGE PLANS: keys: 0 1 + Reducer 17 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col2 (type: string) + 1 _col1 (type: int), _col15 (type: string) + outputColumnNames: _col1, _col3, _col6, _col7, _col8, _col11, _col15, _col17, _col18 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col8 <> _col3) (type: boolean) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col6 (type: string), _col7 (type: string), _col11 (type: int), _col15 (type: decimal(7,2)), _col17 (type: string), _col18 (type: string) + outputColumnNames: _col1, _col6, _col7, _col11, _col15, _col17, _col18 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col11 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col11 (type: int) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col6 (type: string), _col7 (type: string), _col15 (type: decimal(7,2)), _col17 (type: string), _col18 (type: string) + Reducer 19 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col7, _col11, _col13, _col14, _col15 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col15 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col15 (type: string) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: int), _col11 (type: decimal(7,2)), _col13 (type: string), _col14 (type: string) + Reducer 21 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col3 (type: int) + outputColumnNames: _col2, _col3, _col6, _col8, _col9, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col6 (type: decimal(7,2)), _col8 (type: string), _col9 (type: string), _col10 (type: string) Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 10 Map Operator Tree: TableScan alias: store @@ -425,68 +429,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 440), Map 8 (PARTITION-LEVEL SORT, 440) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 516), Reducer 2 (PARTITION-LEVEL SORT, 516) - Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) - Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 899), Reducer 4 (PARTITION-LEVEL SORT, 899) - Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 640) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 899), Reducer 5 (PARTITION-LEVEL SORT, 899) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 640) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1009), Reducer 7 (PARTITION-LEVEL SORT, 1009) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 516), Reducer 9 (PARTITION-LEVEL SORT, 516) + Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 440), Map 8 (PARTITION-LEVEL SORT, 440) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7, _col8 - input vertices: - 1 Map 7 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 10 - Map Operator Tree: - TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) - Execution mode: vectorized - Map 11 Map Operator Tree: TableScan alias: customer_address @@ -507,7 +457,7 @@ STAGE PLANS: Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col3 (type: string) Execution mode: vectorized - Map 8 + Map 11 Map Operator Tree: TableScan alias: item @@ -528,7 +478,28 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)), _col2 (type: string), _col3 (type: string), _col4 (type: int) Execution mode: vectorized - Map 9 + Map 4 + Map Operator Tree: + TableScan + alias: customer + filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string), c_birth_country (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Execution mode: vectorized + Map 6 Map Operator Tree: TableScan alias: store_returns @@ -548,77 +519,59 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map 8 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_ticket_number is not null and ss_item_sk is not null and ss_store_sk is not null and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7, _col8 + input vertices: + 1 Map 10 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string) + Execution mode: vectorized + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7, _col8, _col10, _col11, _col12, _col13 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col3 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col3 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col10 (type: decimal(7,2)), _col11 (type: string), _col12 (type: string), _col13 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col3 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col1, _col4, _col6, _col7, _col8, _col10, _col11, _col12, _col13 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col10 (type: decimal(7,2)), _col11 (type: string), _col12 (type: string), _col13 (type: int) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col6, _col7, _col8, _col10, _col11, _col12, _col13, _col17, _col18, _col19, _col20 - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col17 (type: int), _col8 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col17 (type: int), _col8 (type: string) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col10 (type: decimal(7,2)), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col17 (type: int), _col8 (type: string) - 1 _col0 (type: int), _col2 (type: string) - outputColumnNames: _col4, _col6, _col7, _col10, _col11, _col12, _col13, _col18, _col19, _col20, _col22, _col24 + 0 _col0 (type: int), _col2 (type: string) + 1 _col1 (type: int), _col15 (type: string) + outputColumnNames: _col1, _col3, _col6, _col7, _col8, _col15, _col17, _col18, _col21, _col22, _col23, _col24 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col20 <> _col24) (type: boolean) + predicate: (_col8 <> _col3) (type: boolean) Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col10 (type: decimal(7,2)), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col18 (type: string), _col19 (type: string), _col22 (type: string) - outputColumnNames: _col4, _col6, _col7, _col10, _col11, _col12, _col13, _col18, _col19, _col22 + expressions: _col1 (type: string), _col6 (type: string), _col7 (type: string), _col15 (type: decimal(7,2)), _col17 (type: string), _col18 (type: string), _col21 (type: decimal(7,2)), _col22 (type: string), _col23 (type: string), _col24 (type: int) + outputColumnNames: _col1, _col6, _col7, _col15, _col17, _col18, _col21, _col22, _col23, _col24 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col4) - keys: _col18 (type: string), _col19 (type: string), _col6 (type: string), _col22 (type: string), _col7 (type: string), _col10 (type: decimal(7,2)), _col11 (type: string), _col12 (type: string), _col13 (type: int) + aggregations: sum(_col15) + keys: _col6 (type: string), _col7 (type: string), _col17 (type: string), _col1 (type: string), _col18 (type: string), _col21 (type: decimal(7,2)), _col22 (type: string), _col23 (type: string), _col24 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 @@ -630,7 +583,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE value expressions: _col9 (type: decimal(17,2)) - Reducer 6 + Reducer 3 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -666,7 +619,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4 input vertices: - 1 Reducer 18 + 1 Reducer 15 Statistics: Num rows: 231911707 Data size: 74494745865 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col3 > _col4) (type: boolean) @@ -682,6 +635,57 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col11, _col13, _col14, _col15, _col17, _col18, _col19, _col20 + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col15 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col15 (type: string) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col11 (type: decimal(7,2)), _col13 (type: string), _col14 (type: string), _col17 (type: decimal(7,2)), _col18 (type: string), _col19 (type: string), _col20 (type: int) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col3 (type: int) + outputColumnNames: _col3, _col6, _col8, _col9, _col10, _col12, _col13, _col14, _col15 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col6 (type: decimal(7,2)), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col12 (type: decimal(7,2)), _col13 (type: string), _col14 (type: string), _col15 (type: int) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col6, _col7, _col8, _col10, _col11, _col12, _col13 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col3 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col3 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col4 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col10 (type: decimal(7,2)), _col11 (type: string), _col12 (type: string), _col13 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query25.q.out b/ql/src/test/results/clientpositive/perf/spark/query25.q.out index fa3c571c96..480611ffff 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query25.q.out @@ -116,7 +116,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 16 + Map 1 Map Operator Tree: TableScan alias: store @@ -131,8 +131,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col6 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -140,38 +140,38 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306) - Reducer 11 <- Reducer 10 (PARTITION-LEVEL SORT, 374), Reducer 14 (PARTITION-LEVEL SORT, 374) - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 36), Map 15 (PARTITION-LEVEL SORT, 36) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442) - Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 850), Reducer 3 (PARTITION-LEVEL SORT, 850) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 306), Map 13 (PARTITION-LEVEL SORT, 306) + Reducer 12 <- Reducer 11 (PARTITION-LEVEL SORT, 374), Reducer 15 (PARTITION-LEVEL SORT, 374) + Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 36), Map 16 (PARTITION-LEVEL SORT, 36) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 442), Reducer 8 (PARTITION-LEVEL SORT, 442) + Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 850), Reducer 3 (PARTITION-LEVEL SORT, 850) Reducer 5 <- Reducer 4 (GROUP, 582) Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: - Map 1 + Map 10 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_net_profit (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 12 + Map 13 Map Operator Tree: TableScan alias: d3 @@ -191,7 +191,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 13 + Map 14 Map Operator Tree: TableScan alias: store_returns @@ -212,7 +212,7 @@ STAGE PLANS: Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map 15 + Map 16 Map Operator Tree: TableScan alias: d2 @@ -232,27 +232,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: d1 - filterExpr: ((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 8 + Map 2 Map Operator Tree: TableScan alias: item @@ -273,28 +253,48 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized + Map 7 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) + Execution mode: vectorized Map 9 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: d1 + filterExpr: ((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_net_profit (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 10 + Reducer 11 Reduce Operator Tree: Join Operator condition map: @@ -311,7 +311,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int), _col1 (type: int) Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: decimal(7,2)) - Reducer 11 + Reducer 12 Reduce Operator Tree: Join Operator condition map: @@ -328,7 +328,7 @@ STAGE PLANS: Map-reduce partition columns: _col6 (type: int), _col7 (type: int), _col8 (type: int) Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: decimal(7,2)), _col9 (type: decimal(7,2)) - Reducer 14 + Reducer 15 Reduce Operator Tree: Join Operator condition map: @@ -345,40 +345,23 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int), _col2 (type: int) Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col8, _col9 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + key expressions: _col4 (type: int), _col5 (type: int), _col7 (type: int) null sort order: zzz sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + Map-reduce partition columns: _col4 (type: int), _col5 (type: int), _col7 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col8 (type: string), _col9 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col6 (type: int), _col8 (type: decimal(7,2)) Reducer 4 Local Work: Map Reduce Local Work @@ -387,23 +370,23 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) + 0 _col4 (type: int), _col5 (type: int), _col7 (type: int) 1 _col6 (type: int), _col7 (type: int), _col8 (type: int) - outputColumnNames: _col3, _col5, _col8, _col9, _col13, _col19 + outputColumnNames: _col1, _col2, _col6, _col8, _col13, _col19 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col8, _col9, _col13, _col19, _col22, _col23 + 0 _col0 (type: int) + 1 _col6 (type: int) + outputColumnNames: _col1, _col2, _col4, _col5, _col11, _col16, _col22 input vertices: - 1 Map 16 + 0 Map 1 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col5), sum(_col19), sum(_col13) - keys: _col8 (type: string), _col9 (type: string), _col22 (type: string), _col23 (type: string) + aggregations: sum(_col11), sum(_col22), sum(_col16) + keys: _col4 (type: string), _col5 (type: string), _col1 (type: string), _col2 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -449,6 +432,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query26.q.out b/ql/src/test/results/clientpositive/perf/spark/query26.q.out index 9e65060a25..999254b521 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query26.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query26.q.out @@ -60,7 +60,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 5 Map Operator Tree: TableScan alias: promotion @@ -75,8 +75,8 @@ STAGE PLANS: Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -84,35 +84,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 7 (PARTITION-LEVEL SORT, 306) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 336), Reducer 2 (PARTITION-LEVEL SORT, 336) - Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 411), Reducer 3 (PARTITION-LEVEL SORT, 411) - Reducer 5 <- Reducer 4 (GROUP, 447) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 411), Reducer 7 (PARTITION-LEVEL SORT, 411) + Reducer 3 <- Reducer 2 (GROUP, 447) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 336), Reducer 9 (PARTITION-LEVEL SORT, 336) + Reducer 9 <- Map 10 (PARTITION-LEVEL SORT, 306), Map 8 (PARTITION-LEVEL SORT, 306) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: catalog_sales - filterExpr: (cs_bill_cdemo_sk is not null and cs_sold_date_sk is not null and cs_item_sk is not null and cs_promo_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cs_bill_cdemo_sk is not null and cs_sold_date_sk is not null and cs_item_sk is not null and cs_promo_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2)), cs_sales_price (type: decimal(7,2)), cs_coupon_amt (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) - Execution mode: vectorized - Map 10 Map Operator Tree: TableScan alias: item @@ -133,7 +112,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 7 + Map 10 Map Operator Tree: TableScan alias: customer_demographics @@ -153,7 +132,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 + Map 6 Map Operator Tree: TableScan alias: date_dim @@ -173,65 +152,40 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map 8 + Map Operator Tree: + TableScan + alias: catalog_sales + filterExpr: (cs_bill_cdemo_sk is not null and cs_sold_date_sk is not null and cs_item_sk is not null and cs_promo_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cs_bill_cdemo_sk is not null and cs_sold_date_sk is not null and cs_item_sk is not null and cs_promo_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2)), cs_sales_price (type: decimal(7,2)), cs_coupon_amt (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + Execution mode: vectorized Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) - Reducer 3 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4, _col5, _col6, _col7 - input vertices: - 1 Map 9 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col5, _col6, _col7, _col12 + 1 _col4 (type: int) + outputColumnNames: _col1, _col8, _col9, _col10, _col11 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col4), count(_col4), sum(_col5), count(_col5), sum(_col7), count(_col7), sum(_col6), count(_col6) - keys: _col12 (type: string) + aggregations: sum(_col8), count(_col8), sum(_col9), count(_col9), sum(_col11), count(_col11), sum(_col10), count(_col10) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -244,7 +198,7 @@ STAGE PLANS: Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint), _col7 (type: decimal(17,2)), _col8 (type: bigint) - Reducer 5 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -264,7 +218,7 @@ STAGE PLANS: Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double), _col2 (type: decimal(11,6)), _col3 (type: decimal(11,6)), _col4 (type: decimal(11,6)) - Reducer 6 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -281,6 +235,52 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col4, _col6, _col7, _col8, _col9 + input vertices: + 0 Map 5 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + value expressions: _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query27.q.out b/ql/src/test/results/clientpositive/perf/spark/query27.q.out index 7192f8dec7..df2fefb96d 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query27.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query27.q.out @@ -64,7 +64,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 5 Map Operator Tree: TableScan alias: store @@ -79,8 +79,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -88,35 +88,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) - Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 534), Reducer 3 (PARTITION-LEVEL SORT, 534) - Reducer 5 <- Reducer 4 (GROUP, 1009) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 534), Reducer 7 (PARTITION-LEVEL SORT, 534) + Reducer 3 <- Reducer 2 (GROUP, 1009) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 438), Reducer 9 (PARTITION-LEVEL SORT, 438) + Reducer 9 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_cdemo_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_list_price (type: decimal(7,2)), ss_sales_price (type: decimal(7,2)), ss_coupon_amt (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) - Execution mode: vectorized - Map 10 Map Operator Tree: TableScan alias: item @@ -137,7 +116,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 7 + Map 10 Map Operator Tree: TableScan alias: customer_demographics @@ -157,7 +136,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 + Map 6 Map Operator Tree: TableScan alias: date_dim @@ -177,64 +156,39 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map 8 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_cdemo_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_list_price (type: decimal(7,2)), ss_sales_price (type: decimal(7,2)), ss_coupon_amt (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + Execution mode: vectorized Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) - Reducer 3 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col11 - input vertices: - 1 Map 9 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col11 (type: string) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col5, _col6, _col7, _col11, _col13 + 1 _col4 (type: int) + outputColumnNames: _col1, _col3, _col9, _col10, _col11, _col12 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col13 (type: string), _col11 (type: string), _col4 (type: int), _col5 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + expressions: _col1 (type: string), _col3 (type: string), _col9 (type: int), _col10 (type: decimal(7,2)), _col12 (type: decimal(7,2)), _col11 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -252,7 +206,7 @@ STAGE PLANS: Statistics: Num rows: 2529945843 Data size: 223192556868 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint), _col7 (type: decimal(17,2)), _col8 (type: bigint), _col9 (type: decimal(17,2)), _col10 (type: bigint) - Reducer 5 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -272,7 +226,7 @@ STAGE PLANS: Statistics: Num rows: 1264972921 Data size: 111596278389 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint), _col3 (type: double), _col4 (type: decimal(11,6)), _col5 (type: decimal(11,6)), _col6 (type: decimal(11,6)) - Reducer 6 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -289,6 +243,52 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col1, _col4, _col7, _col8, _col9, _col10 + input vertices: + 0 Map 5 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col7 (type: int), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query29.q.out b/ql/src/test/results/clientpositive/perf/spark/query29.q.out index c1ce7c7c98..fc4be4b4f7 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query29.q.out @@ -114,7 +114,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 16 + Map 1 Map Operator Tree: TableScan alias: store @@ -129,8 +129,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col6 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -138,38 +138,38 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306) - Reducer 11 <- Reducer 10 (PARTITION-LEVEL SORT, 374), Reducer 14 (PARTITION-LEVEL SORT, 374) - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 36), Map 15 (PARTITION-LEVEL SORT, 36) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442) - Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 850), Reducer 3 (PARTITION-LEVEL SORT, 850) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 306), Map 13 (PARTITION-LEVEL SORT, 306) + Reducer 12 <- Reducer 11 (PARTITION-LEVEL SORT, 374), Reducer 15 (PARTITION-LEVEL SORT, 374) + Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 36), Map 16 (PARTITION-LEVEL SORT, 36) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 442), Reducer 8 (PARTITION-LEVEL SORT, 442) + Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 850), Reducer 3 (PARTITION-LEVEL SORT, 850) Reducer 5 <- Reducer 4 (GROUP, 582) Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: - Map 1 + Map 10 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Map 12 + Map 13 Map Operator Tree: TableScan alias: d3 @@ -189,7 +189,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 13 + Map 14 Map Operator Tree: TableScan alias: store_returns @@ -210,7 +210,7 @@ STAGE PLANS: Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized - Map 15 + Map 16 Map Operator Tree: TableScan alias: d2 @@ -230,27 +230,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: d1 - filterExpr: ((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 8 + Map 2 Map Operator Tree: TableScan alias: item @@ -271,28 +251,48 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized + Map 7 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) + Execution mode: vectorized Map 9 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: d1 + filterExpr: ((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 10 + Reducer 11 Reduce Operator Tree: Join Operator condition map: @@ -309,7 +309,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int), _col1 (type: int) Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int) - Reducer 11 + Reducer 12 Reduce Operator Tree: Join Operator condition map: @@ -326,7 +326,7 @@ STAGE PLANS: Map-reduce partition columns: _col6 (type: int), _col7 (type: int), _col8 (type: int) Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col9 (type: int) - Reducer 14 + Reducer 15 Reduce Operator Tree: Join Operator condition map: @@ -343,40 +343,23 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int), _col2 (type: int) Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col4 (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col8, _col9 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) + key expressions: _col4 (type: int), _col5 (type: int), _col7 (type: int) null sort order: zzz sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col4 (type: int) + Map-reduce partition columns: _col4 (type: int), _col5 (type: int), _col7 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: int), _col8 (type: string), _col9 (type: string) + value expressions: _col1 (type: string), _col2 (type: string), _col6 (type: int), _col8 (type: int) Reducer 4 Local Work: Map Reduce Local Work @@ -385,23 +368,23 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) + 0 _col4 (type: int), _col5 (type: int), _col7 (type: int) 1 _col6 (type: int), _col7 (type: int), _col8 (type: int) - outputColumnNames: _col3, _col5, _col8, _col9, _col13, _col19 + outputColumnNames: _col1, _col2, _col6, _col8, _col13, _col19 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col8, _col9, _col13, _col19, _col22, _col23 + 0 _col0 (type: int) + 1 _col6 (type: int) + outputColumnNames: _col1, _col2, _col4, _col5, _col11, _col16, _col22 input vertices: - 1 Map 16 + 0 Map 1 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col5), sum(_col19), sum(_col13) - keys: _col8 (type: string), _col9 (type: string), _col22 (type: string), _col23 (type: string) + aggregations: sum(_col11), sum(_col22), sum(_col16) + keys: _col4 (type: string), _col5 (type: string), _col1 (type: string), _col2 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -447,6 +430,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query30.q.out b/ql/src/test/results/clientpositive/perf/spark/query30.q.out index 24cf5f6160..b9fe201643 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query30.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query30.q.out @@ -76,16 +76,16 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Reducer 16 (PARTITION-LEVEL SORT, 262), Reducer 9 (PARTITION-LEVEL SORT, 262) - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 11), Map 17 (PARTITION-LEVEL SORT, 11) - Reducer 15 <- Map 18 (PARTITION-LEVEL SORT, 329), Reducer 14 (PARTITION-LEVEL SORT, 329) - Reducer 16 <- Reducer 15 (GROUP PARTITION-LEVEL SORT, 349) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 11), Map 12 (PARTITION-LEVEL SORT, 11) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 329), Reducer 17 (PARTITION-LEVEL SORT, 329) + Reducer 15 <- Reducer 14 (GROUP PARTITION-LEVEL SORT, 349) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 11), Map 18 (PARTITION-LEVEL SORT, 11) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 5 (PARTITION-LEVEL SORT, 697) - Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 656), Reducer 2 (PARTITION-LEVEL SORT, 656) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 656), Reducer 9 (PARTITION-LEVEL SORT, 656) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 7 <- Map 11 (PARTITION-LEVEL SORT, 11), Map 6 (PARTITION-LEVEL SORT, 11) - Reducer 8 <- Map 12 (PARTITION-LEVEL SORT, 329), Reducer 7 (PARTITION-LEVEL SORT, 329) - Reducer 9 <- Reducer 8 (GROUP, 349) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 329), Reducer 11 (PARTITION-LEVEL SORT, 329) + Reducer 8 <- Reducer 7 (GROUP, 349) + Reducer 9 <- Reducer 15 (PARTITION-LEVEL SORT, 262), Reducer 8 (PARTITION-LEVEL SORT, 262) #### A masked pattern was here #### Vertices: Map 1 @@ -109,7 +109,28 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string) Execution mode: vectorized - Map 11 + Map 10 + Map Operator Tree: + TableScan + alias: web_returns + filterExpr: (wr_returned_date_sk is not null and wr_returning_addr_sk is not null and wr_returning_customer_sk is not null) (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (wr_returned_date_sk is not null and wr_returning_addr_sk is not null and wr_returning_customer_sk is not null) (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: wr_returned_date_sk (type: int), wr_returning_customer_sk (type: int), wr_returning_addr_sk (type: int), wr_return_amt (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Execution mode: vectorized + Map 12 Map Operator Tree: TableScan alias: date_dim @@ -129,7 +150,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 12 + Map 13 Map Operator Tree: TableScan alias: customer_address @@ -150,7 +171,7 @@ STAGE PLANS: Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 13 + Map 16 Map Operator Tree: TableScan alias: web_returns @@ -171,7 +192,7 @@ STAGE PLANS: Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 17 + Map 18 Map Operator Tree: TableScan alias: date_dim @@ -191,27 +212,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 18 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: (ca_address_sk is not null and ca_state is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ca_address_sk is not null and ca_state is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int), ca_state (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -235,49 +235,25 @@ STAGE PLANS: Map 6 Map Operator Tree: TableScan - alias: web_returns - filterExpr: (wr_returned_date_sk is not null and wr_returning_addr_sk is not null and wr_returning_customer_sk is not null) (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: (ca_address_sk is not null and ca_state is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (wr_returned_date_sk is not null and wr_returning_addr_sk is not null and wr_returning_customer_sk is not null) (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + predicate: (ca_address_sk is not null and ca_state is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wr_returned_date_sk (type: int), wr_returning_customer_sk (type: int), wr_returning_addr_sk (type: int), wr_return_amt (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + expressions: ca_address_sk (type: int), ca_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Reducer 10 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col2 > _col3) (type: boolean) - Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: decimal(17,2)) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Reducer 14 + Reducer 11 Reduce Operator Tree: Join Operator condition map: @@ -294,19 +270,19 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - Reducer 15 + Reducer 14 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col6 + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col3, _col5 Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col3) - keys: _col6 (type: string), _col1 (type: int) + aggregations: sum(_col5) + keys: _col1 (type: string), _col3 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -318,7 +294,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) - Reducer 16 + Reducer 15 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -351,6 +327,23 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(24,7)) + Reducer 17 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) Reducer 2 Reduce Operator Tree: Join Operator @@ -412,29 +405,12 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - Reducer 8 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col6 + 1 _col2 (type: int) + outputColumnNames: _col1, _col3, _col5 Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col3) - keys: _col6 (type: string), _col1 (type: int) + aggregations: sum(_col5) + keys: _col1 (type: string), _col3 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -446,7 +422,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) - Reducer 9 + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -469,6 +445,30 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(17,2)) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 > _col3) (type: boolean) + Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: decimal(17,2)) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query31.q.out b/ql/src/test/results/clientpositive/perf/spark/query31.q.out index 51995f67c8..d716f2e46e 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query31.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query31.q.out @@ -118,29 +118,50 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 486), Reducer 9 (PARTITION-LEVEL SORT, 486) - Reducer 11 <- Reducer 10 (GROUP, 186) - Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 154), Map 18 (PARTITION-LEVEL SORT, 154) - Reducer 16 <- Map 19 (PARTITION-LEVEL SORT, 486), Reducer 15 (PARTITION-LEVEL SORT, 486) - Reducer 17 <- Reducer 16 (GROUP, 186) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 6 (PARTITION-LEVEL SORT, 154) - Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 398), Map 25 (PARTITION-LEVEL SORT, 398) - Reducer 22 <- Map 26 (PARTITION-LEVEL SORT, 754), Reducer 21 (PARTITION-LEVEL SORT, 754) - Reducer 23 <- Reducer 22 (GROUP, 481) - Reducer 24 <- Reducer 23 (PARTITION-LEVEL SORT, 721), Reducer 30 (PARTITION-LEVEL SORT, 721), Reducer 36 (PARTITION-LEVEL SORT, 721) - Reducer 28 <- Map 27 (PARTITION-LEVEL SORT, 398), Map 31 (PARTITION-LEVEL SORT, 398) - Reducer 29 <- Map 32 (PARTITION-LEVEL SORT, 754), Reducer 28 (PARTITION-LEVEL SORT, 754) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 486), Reducer 2 (PARTITION-LEVEL SORT, 486) - Reducer 30 <- Reducer 29 (GROUP, 481) - Reducer 34 <- Map 33 (PARTITION-LEVEL SORT, 398), Map 37 (PARTITION-LEVEL SORT, 398) - Reducer 35 <- Map 38 (PARTITION-LEVEL SORT, 754), Reducer 34 (PARTITION-LEVEL SORT, 754) - Reducer 36 <- Reducer 35 (GROUP, 481) - Reducer 4 <- Reducer 3 (GROUP, 186) - Reducer 5 <- Reducer 11 (PARTITION-LEVEL SORT, 807), Reducer 17 (PARTITION-LEVEL SORT, 807), Reducer 24 (PARTITION-LEVEL SORT, 807), Reducer 4 (PARTITION-LEVEL SORT, 807) - Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 154), Map 8 (PARTITION-LEVEL SORT, 154) + Reducer 10 <- Reducer 9 (GROUP, 186) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 154), Map 13 (PARTITION-LEVEL SORT, 154) + Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 486), Reducer 18 (PARTITION-LEVEL SORT, 486) + Reducer 16 <- Reducer 15 (GROUP, 186) + Reducer 18 <- Map 17 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 486), Reducer 6 (PARTITION-LEVEL SORT, 486) + Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 754), Reducer 25 (PARTITION-LEVEL SORT, 754) + Reducer 22 <- Reducer 21 (GROUP, 481) + Reducer 23 <- Reducer 22 (PARTITION-LEVEL SORT, 721), Reducer 29 (PARTITION-LEVEL SORT, 721), Reducer 35 (PARTITION-LEVEL SORT, 721) + Reducer 25 <- Map 24 (PARTITION-LEVEL SORT, 398), Map 26 (PARTITION-LEVEL SORT, 398) + Reducer 28 <- Map 27 (PARTITION-LEVEL SORT, 754), Reducer 31 (PARTITION-LEVEL SORT, 754) + Reducer 29 <- Reducer 28 (GROUP, 481) + Reducer 3 <- Reducer 2 (GROUP, 186) + Reducer 31 <- Map 30 (PARTITION-LEVEL SORT, 398), Map 32 (PARTITION-LEVEL SORT, 398) + Reducer 34 <- Map 33 (PARTITION-LEVEL SORT, 754), Reducer 37 (PARTITION-LEVEL SORT, 754) + Reducer 35 <- Reducer 34 (GROUP, 481) + Reducer 37 <- Map 36 (PARTITION-LEVEL SORT, 398), Map 38 (PARTITION-LEVEL SORT, 398) + Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 807), Reducer 16 (PARTITION-LEVEL SORT, 807), Reducer 23 (PARTITION-LEVEL SORT, 807), Reducer 3 (PARTITION-LEVEL SORT, 807) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154) + Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 486), Reducer 12 (PARTITION-LEVEL SORT, 486) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: customer_address + filterExpr: (ca_address_sk is not null and ca_county is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ca_address_sk is not null and ca_county is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_county (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized + Map 11 Map Operator Tree: TableScan alias: web_sales @@ -161,7 +182,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 12 + Map 13 Map Operator Tree: TableScan alias: date_dim @@ -181,7 +202,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 13 + Map 14 Map Operator Tree: TableScan alias: customer_address @@ -202,7 +223,7 @@ STAGE PLANS: Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 14 + Map 17 Map Operator Tree: TableScan alias: web_sales @@ -223,7 +244,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 18 + Map 19 Map Operator Tree: TableScan alias: date_dim @@ -243,7 +264,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 19 + Map 20 Map Operator Tree: TableScan alias: customer_address @@ -264,7 +285,7 @@ STAGE PLANS: Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 20 + Map 24 Map Operator Tree: TableScan alias: store_sales @@ -285,7 +306,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 25 + Map 26 Map Operator Tree: TableScan alias: date_dim @@ -305,7 +326,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 26 + Map 27 Map Operator Tree: TableScan alias: customer_address @@ -326,7 +347,7 @@ STAGE PLANS: Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 27 + Map 30 Map Operator Tree: TableScan alias: store_sales @@ -347,7 +368,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 31 + Map 32 Map Operator Tree: TableScan alias: date_dim @@ -367,7 +388,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 32 + Map 33 Map Operator Tree: TableScan alias: customer_address @@ -388,7 +409,7 @@ STAGE PLANS: Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 33 + Map 36 Map Operator Tree: TableScan alias: store_sales @@ -409,7 +430,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 37 + Map 38 Map Operator Tree: TableScan alias: date_dim @@ -429,28 +450,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 38 + Map 5 Map Operator Tree: TableScan - alias: customer_address - filterExpr: (ca_address_sk is not null and ca_county is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (ws_sold_date_sk is not null and ws_bill_addr_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ca_address_sk is not null and ca_county is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_sold_date_sk is not null and ws_bill_addr_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int), ca_county (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + expressions: ws_sold_date_sk (type: int), ws_bill_addr_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -470,7 +491,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan alias: customer_address @@ -491,52 +512,7 @@ STAGE PLANS: Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: web_sales - filterExpr: (ws_sold_date_sk is not null and ws_bill_addr_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ws_sold_date_sk is not null and ws_bill_addr_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ws_sold_date_sk (type: int), ws_bill_addr_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) - Execution mode: vectorized Reducer 10 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)) - Reducer 11 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -552,7 +528,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) - Reducer 15 + Reducer 12 Reduce Operator Tree: Join Operator condition map: @@ -569,19 +545,19 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)) - Reducer 16 + Reducer 15 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col4 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string) + aggregations: sum(_col4) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 @@ -593,7 +569,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) - Reducer 17 + Reducer 16 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -613,7 +589,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: boolean) - Reducer 2 + Reducer 18 Reduce Operator Tree: Join Operator condition map: @@ -630,36 +606,43 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)) - Reducer 21 + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)) - Reducer 22 + 1 _col1 (type: int) + outputColumnNames: _col1, _col4 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4) + keys: _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)) + Reducer 21 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col4 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string) + aggregations: sum(_col4) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 @@ -671,7 +654,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) - Reducer 23 + Reducer 22 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -687,7 +670,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) - Reducer 24 + Reducer 23 Reduce Operator Tree: Join Operator condition map: @@ -706,7 +689,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 766650239 Data size: 67634106674 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col5 (type: decimal(17,2)) - Reducer 28 + Reducer 25 Reduce Operator Tree: Join Operator condition map: @@ -723,19 +706,19 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)) - Reducer 29 + Reducer 28 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col4 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string) + aggregations: sum(_col4) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 @@ -747,31 +730,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)) - Reducer 30 + Reducer 29 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -787,7 +746,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) - Reducer 34 + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: decimal(17,2)), (_col1 > 0) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: boolean) + Reducer 31 Reduce Operator Tree: Join Operator condition map: @@ -804,19 +783,19 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)) - Reducer 35 + Reducer 34 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col4 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string) + aggregations: sum(_col4) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 @@ -828,7 +807,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) - Reducer 36 + Reducer 35 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -844,27 +823,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) - Reducer 4 - Execution mode: vectorized + Reducer 37 Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: decimal(17,2)), (_col1 > 0) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)), _col2 (type: boolean) - Reducer 5 + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -892,7 +868,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 + Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -909,6 +885,30 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col4 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4) + keys: _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query33.q.out b/ql/src/test/results/clientpositive/perf/spark/query33.q.out index 8d80e83707..9c7feb2d19 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query33.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query33.q.out @@ -168,21 +168,21 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) - Reducer 11 <- Map 13 (PARTITION-LEVEL SORT, 596), Reducer 10 (PARTITION-LEVEL SORT, 596) + Reducer 10 <- Map 20 (PARTITION-LEVEL SORT, 596), Reducer 12 (PARTITION-LEVEL SORT, 596) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 13 (PARTITION-LEVEL SORT, 398) Reducer 15 <- Map 1 (PARTITION-LEVEL SORT, 7), Reducer 19 (PARTITION-LEVEL SORT, 7) - Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 375), Reducer 22 (PARTITION-LEVEL SORT, 375) + Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 375), Reducer 21 (PARTITION-LEVEL SORT, 375) Reducer 17 <- Reducer 16 (GROUP, 406) Reducer 19 <- Map 18 (GROUP, 3) - Reducer 21 <- Map 12 (PARTITION-LEVEL SORT, 305), Map 20 (PARTITION-LEVEL SORT, 305) - Reducer 22 <- Map 13 (PARTITION-LEVEL SORT, 494), Reducer 21 (PARTITION-LEVEL SORT, 494) + Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 494), Reducer 23 (PARTITION-LEVEL SORT, 494) + Reducer 23 <- Map 13 (PARTITION-LEVEL SORT, 305), Map 22 (PARTITION-LEVEL SORT, 305) Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 7), Reducer 30 (PARTITION-LEVEL SORT, 7) - Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 191), Reducer 33 (PARTITION-LEVEL SORT, 191) + Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 191), Reducer 32 (PARTITION-LEVEL SORT, 191) Reducer 28 <- Reducer 27 (GROUP, 204) - Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 487), Reducer 15 (PARTITION-LEVEL SORT, 487) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 487), Reducer 15 (PARTITION-LEVEL SORT, 487) Reducer 30 <- Map 18 (GROUP, 3) - Reducer 32 <- Map 31 (PARTITION-LEVEL SORT, 154), Map 34 (PARTITION-LEVEL SORT, 154) - Reducer 33 <- Map 35 (PARTITION-LEVEL SORT, 327), Reducer 32 (PARTITION-LEVEL SORT, 327) + Reducer 32 <- Map 31 (PARTITION-LEVEL SORT, 327), Reducer 34 (PARTITION-LEVEL SORT, 327) + Reducer 34 <- Map 33 (PARTITION-LEVEL SORT, 154), Map 35 (PARTITION-LEVEL SORT, 154) Reducer 4 <- Reducer 3 (GROUP, 529) Reducer 5 <- Reducer 17 (GROUP, 569), Reducer 28 (GROUP, 569), Reducer 4 (GROUP, 569) Reducer 6 <- Reducer 5 (SORT, 1) @@ -209,45 +209,46 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 12 + Map 11 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 1999) and (d_moy = 3) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 1999) and (d_moy = 3) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_addr_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized Map 13 Map Operator Tree: TableScan - alias: customer_address - filterExpr: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_year = 1999) and (d_moy = 3) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 1999) and (d_moy = 3) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int) + expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 18 Map Operator Tree: @@ -276,6 +277,26 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 20 + Map Operator Tree: + TableScan + alias: customer_address + filterExpr: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 22 Map Operator Tree: TableScan alias: catalog_sales @@ -318,6 +339,26 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized Map 31 + Map Operator Tree: + TableScan + alias: customer_address + filterExpr: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 33 Map Operator Tree: TableScan alias: web_sales @@ -338,7 +379,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 34 + Map 35 Map Operator Tree: TableScan alias: date_dim @@ -358,47 +399,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 35 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 9 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_addr_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) - Execution mode: vectorized Reducer 10 Reduce Operator Tree: Join Operator @@ -406,37 +406,33 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + 1 _col2 (type: int) + outputColumnNames: _col2, _col4 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - Reducer 11 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)) + Reducer 12 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - outputColumnNames: _col2, _col4 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) Reducer 15 Reduce Operator Tree: Join Operator @@ -516,6 +512,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE Reducer 21 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col3, _col4 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)) + Reducer 23 Reduce Operator Tree: Join Operator condition map: @@ -532,27 +545,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) - Reducer 22 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: decimal(7,2)) - outputColumnNames: _col3, _col4 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)) Reducer 26 Reduce Operator Tree: Join Operator @@ -662,37 +654,33 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + 1 _col2 (type: int) + outputColumnNames: _col2, _col4 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - Reducer 33 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)) + Reducer 34 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - outputColumnNames: _col2, _col4 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) Reducer 4 Execution mode: vectorized Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/spark/query34.q.out b/ql/src/test/results/clientpositive/perf/spark/query34.q.out index ff49373743..72ee5f17dd 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query34.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query34.q.out @@ -81,42 +81,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 3 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: ((hd_vehicle_count > 0) and hd_demo_sk is not null and (hd_buy_potential) IN ('>10000', 'unknown') and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (false) END) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: store + filterExpr: (s_store_sk is not null and (s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County', 'Fairfield County', 'Jackson County', 'Barrow County', 'Pennington County')) (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((hd_vehicle_count > 0) and hd_demo_sk is not null and (hd_buy_potential) IN ('>10000', 'unknown') and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (false) END) (type: boolean) - Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE + predicate: (s_store_sk is not null and (s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County', 'Fairfield County', 'Jackson County', 'Barrow County', 'Pennington County')) (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) + expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col3 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work Map 8 Map Operator Tree: TableScan - alias: store - filterExpr: (s_store_sk is not null and (s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County', 'Fairfield County', 'Jackson County', 'Barrow County', 'Pennington County')) (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_vehicle_count > 0) and hd_demo_sk is not null and (hd_buy_potential) IN ('>10000', 'unknown') and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (false) END) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (s_store_sk is not null and (s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County', 'Fairfield County', 'Jackson County', 'Barrow County', 'Pennington County')) (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + predicate: ((hd_vehicle_count > 0) and hd_demo_sk is not null and (hd_buy_potential) IN ('>10000', 'unknown') and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (false) END) (type: boolean) + Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: s_store_sk (type: int) + expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -125,11 +125,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) - Reducer 5 <- Reducer 4 (GROUP, 529) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) + Reducer 6 <- Reducer 5 (GROUP, 529) #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: store_sales @@ -150,7 +150,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -170,7 +170,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 4 + Reducer 5 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -190,21 +190,21 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col3, _col4 input vertices: - 1 Map 7 + 1 Map 8 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col4 + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col2, _col5 input vertices: - 1 Map 8 + 0 Map 3 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: _col1 (type: int), _col4 (type: int) + keys: _col2 (type: int), _col5 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -216,7 +216,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -266,7 +266,7 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 input vertices: - 1 Reducer 5 + 1 Reducer 6 Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: int), _col7 (type: bigint) diff --git a/ql/src/test/results/clientpositive/perf/spark/query35.q.out b/ql/src/test/results/clientpositive/perf/spark/query35.q.out index f6a3afeb89..1eef49a6b9 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query35.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query35.q.out @@ -139,33 +139,33 @@ STAGE PLANS: Reducer 14 <- Reducer 13 (GROUP, 169) Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 305), Map 19 (PARTITION-LEVEL SORT, 305) Reducer 18 <- Reducer 17 (GROUP, 336) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 544), Map 7 (PARTITION-LEVEL SORT, 544) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 909), Reducer 2 (PARTITION-LEVEL SORT, 909) - Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 1009), Reducer 14 (PARTITION-LEVEL SORT, 1009), Reducer 18 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) - Reducer 5 <- Reducer 4 (GROUP, 1009) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 909), Reducer 7 (PARTITION-LEVEL SORT, 909) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 1009), Reducer 14 (PARTITION-LEVEL SORT, 1009), Reducer 18 (PARTITION-LEVEL SORT, 1009), Reducer 2 (PARTITION-LEVEL SORT, 1009) + Reducer 4 <- Reducer 3 (GROUP, 1009) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 544), Map 8 (PARTITION-LEVEL SORT, 544) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: c - filterExpr: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: ca + filterExpr: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + expressions: ca_address_sk (type: int), ca_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 11 Map Operator Tree: @@ -269,7 +269,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 6 + Map Operator Tree: + TableScan + alias: c + filterExpr: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: customer_demographics @@ -290,27 +311,6 @@ STAGE PLANS: Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: int) Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: ca - filterExpr: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int), ca_state (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized Map 9 Map Operator Tree: TableScan @@ -446,29 +446,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col4, _col5, _col6, _col7, _col8, _col10 + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col2, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col10 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) + expressions: _col2 (type: int), _col1 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int) outputColumnNames: _col0, _col4, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -478,7 +461,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int) - Reducer 4 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -514,7 +497,7 @@ STAGE PLANS: Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: int), _col13 (type: bigint), _col14 (type: bigint), _col15 (type: int) - Reducer 5 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -534,7 +517,7 @@ STAGE PLANS: Statistics: Num rows: 1045432122 Data size: 92228325287 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: int), _col6 (type: bigint), _col9 (type: double), _col10 (type: int), _col11 (type: bigint), _col14 (type: double), _col15 (type: int), _col16 (type: bigint) - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -551,6 +534,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query36.q.out b/ql/src/test/results/clientpositive/perf/spark/query36.q.out index 1384de4508..b4ee13e8e0 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query36.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query36.q.out @@ -76,7 +76,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 6 Map Operator Tree: TableScan alias: store @@ -91,8 +91,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -100,14 +100,35 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 486), Reducer 2 (PARTITION-LEVEL SORT, 486) - Reducer 4 <- Reducer 3 (GROUP, 1009) - Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 793) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 486), Reducer 8 (PARTITION-LEVEL SORT, 486) + Reducer 3 <- Reducer 2 (GROUP, 1009) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 793) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: item + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_class (type: string), i_category (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: vectorized + Map 7 Map Operator Tree: TableScan alias: store_sales @@ -128,7 +149,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map 7 + Map 9 Map Operator Tree: TableScan alias: d1 @@ -148,68 +169,18 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 9 - Map Operator Tree: - TableScan - alias: item - filterExpr: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_class (type: string), i_category (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) - Execution mode: vectorized Reducer 2 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4 - input vertices: - 1 Map 8 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col8, _col9 + 1 _col2 (type: int) + outputColumnNames: _col1, _col2, _col7, _col8 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col9 (type: string), _col8 (type: string), _col4 (type: decimal(7,2)), _col3 (type: decimal(7,2)) + expressions: _col2 (type: string), _col1 (type: string), _col8 (type: decimal(7,2)), _col7 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -226,7 +197,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 2299950717 Data size: 202902320028 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -246,7 +217,7 @@ STAGE PLANS: Map-reduce partition columns: (grouping(_col4, 1L) + grouping(_col4, 0L)) (type: bigint), CASE WHEN ((grouping(_col4, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END (type: string) Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: bigint) - Reducer 5 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -285,7 +256,7 @@ STAGE PLANS: Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: decimal(37,20)), _col1 (type: string), _col2 (type: string) - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -302,6 +273,35 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col2, _col4, _col5 + input vertices: + 0 Map 6 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query38.q.out b/ql/src/test/results/clientpositive/perf/spark/query38.q.out index f52010ff04..c33f43fab7 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query38.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query38.q.out @@ -62,41 +62,62 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306) - Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 873), Reducer 10 (PARTITION-LEVEL SORT, 873) - Reducer 12 <- Reducer 11 (GROUP PARTITION-LEVEL SORT, 369) - Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154) - Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 706), Reducer 16 (PARTITION-LEVEL SORT, 706) - Reducer 18 <- Reducer 17 (GROUP PARTITION-LEVEL SORT, 186) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 13 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 14 (PARTITION-LEVEL SORT, 975), Reducer 2 (PARTITION-LEVEL SORT, 975) - Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 481) - Reducer 5 <- Reducer 12 (GROUP, 259), Reducer 18 (GROUP, 259), Reducer 4 (GROUP, 259) - Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 10 <- Map 1 (PARTITION-LEVEL SORT, 873), Reducer 13 (PARTITION-LEVEL SORT, 873) + Reducer 11 <- Reducer 10 (GROUP PARTITION-LEVEL SORT, 369) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 306), Map 14 (PARTITION-LEVEL SORT, 306) + Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 706), Reducer 19 (PARTITION-LEVEL SORT, 706) + Reducer 17 <- Reducer 16 (GROUP PARTITION-LEVEL SORT, 186) + Reducer 19 <- Map 18 (PARTITION-LEVEL SORT, 154), Map 20 (PARTITION-LEVEL SORT, 154) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 975), Reducer 7 (PARTITION-LEVEL SORT, 975) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 481) + Reducer 4 <- Reducer 11 (GROUP, 259), Reducer 17 (GROUP, 259), Reducer 3 (GROUP, 259) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 7 <- Map 14 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: customer + filterExpr: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) + expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: vectorized + Map 12 + Map Operator Tree: + TableScan + alias: catalog_sales + filterExpr: (cs_sold_date_sk is not null and cs_bill_customer_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cs_sold_date_sk is not null and cs_bill_customer_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map 13 + Map 14 Map Operator Tree: TableScan alias: date_dim @@ -117,7 +138,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 14 + Map 15 Map Operator Tree: TableScan alias: customer @@ -138,7 +159,7 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 15 + Map 18 Map Operator Tree: TableScan alias: web_sales @@ -159,7 +180,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map 19 + Map 20 Map Operator Tree: TableScan alias: date_dim @@ -180,46 +201,25 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 20 - Map Operator Tree: - TableScan - alias: customer - filterExpr: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) - Execution mode: vectorized - Map 9 + Map 6 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_sold_date_sk is not null and cs_bill_customer_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_sold_date_sk is not null and cs_bill_customer_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int) + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized Reducer 10 @@ -229,28 +229,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) - Reducer 11 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6 + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col6 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col6 (type: string), _col5 (type: string), _col3 (type: string) + keys: _col2 (type: string), _col1 (type: string), _col6 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -261,7 +244,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reducer 12 + Reducer 11 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -293,7 +276,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 16 + Reducer 13 Reduce Operator Tree: Join Operator condition map: @@ -302,26 +285,26 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col3 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: string) - Reducer 17 + Reducer 16 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col6 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col6 (type: string), _col5 (type: string), _col3 (type: string) + keys: _col2 (type: string), _col1 (type: string), _col6 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -332,7 +315,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reducer 18 + Reducer 17 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -364,7 +347,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 2 + Reducer 19 Reduce Operator Tree: Join Operator condition map: @@ -373,26 +356,26 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col3 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: string) - Reducer 3 + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col6 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col6 (type: string), _col5 (type: string), _col3 (type: string) + keys: _col2 (type: string), _col1 (type: string), _col6 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -403,7 +386,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -435,7 +418,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 5 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -464,7 +447,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -479,6 +462,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query39.q.out b/ql/src/test/results/clientpositive/perf/spark/query39.q.out index a50b87aab5..52b2710ba9 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query39.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query39.q.out @@ -71,7 +71,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 1 Map Operator Tree: TableScan alias: warehouse @@ -86,8 +86,8 @@ STAGE PLANS: Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col3 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -96,7 +96,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 16 + Map 10 Map Operator Tree: TableScan alias: warehouse @@ -111,8 +111,8 @@ STAGE PLANS: Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col3 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -120,38 +120,37 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 5), Map 14 (PARTITION-LEVEL SORT, 5) - Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 11), Reducer 11 (PARTITION-LEVEL SORT, 11) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 11), Reducer 15 (PARTITION-LEVEL SORT, 11) Reducer 13 <- Reducer 12 (GROUP, 7) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 5), Map 7 (PARTITION-LEVEL SORT, 5) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 11), Reducer 2 (PARTITION-LEVEL SORT, 11) + Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 5), Map 16 (PARTITION-LEVEL SORT, 5) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 11), Reducer 8 (PARTITION-LEVEL SORT, 11) Reducer 4 <- Reducer 3 (GROUP, 7) Reducer 5 <- Reducer 13 (PARTITION-LEVEL SORT, 4), Reducer 4 (PARTITION-LEVEL SORT, 4) Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 5), Map 9 (PARTITION-LEVEL SORT, 5) #### A masked pattern was here #### Vertices: - Map 1 + Map 11 Map Operator Tree: TableScan - alias: inventory - filterExpr: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null) (type: boolean) - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + alias: item + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null) (type: boolean) - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + expressions: i_item_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 10 + Map 14 Map Operator Tree: TableScan alias: inventory @@ -172,7 +171,7 @@ STAGE PLANS: Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Map 14 + Map 16 Map Operator Tree: TableScan alias: date_dim @@ -192,7 +191,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 15 + Map 2 Map Operator Tree: TableScan alias: item @@ -215,60 +214,44 @@ STAGE PLANS: Map 7 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 1999) and (d_moy = 4) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: inventory + filterExpr: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null) (type: boolean) + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 1999) and (d_moy = 4) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + predicate: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null) (type: boolean) + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Map 8 + Map 9 Map Operator Tree: TableScan - alias: item - filterExpr: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_year = 1999) and (d_moy = 4) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 1999) and (d_moy = 4) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int) + expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 11 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int) Reducer 12 Local Work: Map Reduce Local Work @@ -277,22 +260,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col3, _col4 Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col7 + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col6 input vertices: - 1 Map 16 + 0 Map 10 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col7 (type: string), _col6 (type: int), _col5 (type: int), _col3 (type: int), UDFToDouble(_col3) (type: double), (UDFToDouble(_col3) * UDFToDouble(_col3)) (type: double) + expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int), _col6 (type: int), UDFToDouble(_col6) (type: double), (UDFToDouble(_col6) * UDFToDouble(_col6)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -336,7 +319,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: double), _col3 (type: double) - Reducer 2 + Reducer 15 Reduce Operator Tree: Join Operator condition map: @@ -361,22 +344,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col3, _col4 Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col7 + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col6 input vertices: - 1 Map 9 + 0 Map 1 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col7 (type: string), _col6 (type: int), _col5 (type: int), _col3 (type: int), UDFToDouble(_col3) (type: double), (UDFToDouble(_col3) * UDFToDouble(_col3)) (type: double) + expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int), _col6 (type: int), UDFToDouble(_col6) (type: double), (UDFToDouble(_col6) * UDFToDouble(_col6)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -450,6 +433,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query4.q.out b/ql/src/test/results/clientpositive/perf/spark/query4.q.out index 1616f29bf2..ab1dc60fee 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query4.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query4.q.out @@ -234,70 +234,31 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) - Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 975), Reducer 10 (PARTITION-LEVEL SORT, 975) - Reducer 12 <- Reducer 11 (GROUP, 481) - Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154) - Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 706), Reducer 16 (PARTITION-LEVEL SORT, 706) - Reducer 18 <- Reducer 17 (GROUP, 186) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 7 (PARTITION-LEVEL SORT, 306) - Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 306), Map 25 (PARTITION-LEVEL SORT, 306) - Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 873), Reducer 22 (PARTITION-LEVEL SORT, 873) - Reducer 24 <- Reducer 23 (GROUP, 369) - Reducer 28 <- Map 27 (PARTITION-LEVEL SORT, 398), Map 31 (PARTITION-LEVEL SORT, 398) - Reducer 29 <- Map 32 (PARTITION-LEVEL SORT, 975), Reducer 28 (PARTITION-LEVEL SORT, 975) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 873), Reducer 2 (PARTITION-LEVEL SORT, 873) - Reducer 30 <- Reducer 29 (GROUP, 481) - Reducer 34 <- Map 33 (PARTITION-LEVEL SORT, 154), Map 37 (PARTITION-LEVEL SORT, 154) - Reducer 35 <- Map 38 (PARTITION-LEVEL SORT, 706), Reducer 34 (PARTITION-LEVEL SORT, 706) - Reducer 36 <- Reducer 35 (GROUP, 186) - Reducer 4 <- Reducer 3 (GROUP, 369) - Reducer 5 <- Reducer 12 (PARTITION-LEVEL SORT, 690), Reducer 18 (PARTITION-LEVEL SORT, 690), Reducer 24 (PARTITION-LEVEL SORT, 690), Reducer 30 (PARTITION-LEVEL SORT, 690), Reducer 36 (PARTITION-LEVEL SORT, 690), Reducer 4 (PARTITION-LEVEL SORT, 690) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 10 <- Map 9 (PARTITION-LEVEL SORT, 706), Reducer 14 (PARTITION-LEVEL SORT, 706) + Reducer 11 <- Reducer 10 (GROUP, 186) + Reducer 12 <- Reducer 11 (PARTITION-LEVEL SORT, 357), Reducer 19 (PARTITION-LEVEL SORT, 357), Reducer 31 (PARTITION-LEVEL SORT, 357) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 154), Map 15 (PARTITION-LEVEL SORT, 154) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 873), Reducer 21 (PARTITION-LEVEL SORT, 873) + Reducer 18 <- Reducer 17 (GROUP, 369) + Reducer 19 <- Reducer 18 (PARTITION-LEVEL SORT, 265), Reducer 25 (PARTITION-LEVEL SORT, 265) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 975), Reducer 7 (PARTITION-LEVEL SORT, 975) + Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 306), Map 22 (PARTITION-LEVEL SORT, 306) + Reducer 24 <- Map 23 (PARTITION-LEVEL SORT, 975), Reducer 27 (PARTITION-LEVEL SORT, 975) + Reducer 25 <- Reducer 24 (GROUP, 481) + Reducer 27 <- Map 26 (PARTITION-LEVEL SORT, 398), Map 28 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Reducer 2 (GROUP, 481) + Reducer 30 <- Map 23 (PARTITION-LEVEL SORT, 873), Reducer 33 (PARTITION-LEVEL SORT, 873) + Reducer 31 <- Reducer 30 (GROUP, 369) + Reducer 33 <- Map 28 (PARTITION-LEVEL SORT, 306), Map 32 (PARTITION-LEVEL SORT, 306) + Reducer 36 <- Map 16 (PARTITION-LEVEL SORT, 706), Reducer 39 (PARTITION-LEVEL SORT, 706) + Reducer 37 <- Reducer 36 (GROUP, 186) + Reducer 39 <- Map 38 (PARTITION-LEVEL SORT, 154), Map 40 (PARTITION-LEVEL SORT, 154) + Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 718), Reducer 3 (PARTITION-LEVEL SORT, 718), Reducer 37 (PARTITION-LEVEL SORT, 718) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: catalog_sales - filterExpr: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), ((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2) (type: decimal(14,6)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(14,6)) - Execution mode: vectorized - Map 13 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 14 Map Operator Tree: TableScan alias: customer @@ -318,7 +279,7 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) Execution mode: vectorized - Map 15 + Map 13 Map Operator Tree: TableScan alias: web_sales @@ -339,7 +300,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(14,6)) Execution mode: vectorized - Map 19 + Map 15 Map Operator Tree: TableScan alias: date_dim @@ -359,7 +320,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 20 + Map 16 Map Operator Tree: TableScan alias: customer @@ -380,7 +341,7 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) Execution mode: vectorized - Map 21 + Map 20 Map Operator Tree: TableScan alias: catalog_sales @@ -401,14 +362,14 @@ STAGE PLANS: Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(14,6)) Execution mode: vectorized - Map 25 + Map 22 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) @@ -421,7 +382,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 26 + Map 23 Map Operator Tree: TableScan alias: customer @@ -442,7 +403,7 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) Execution mode: vectorized - Map 27 + Map 26 Map Operator Tree: TableScan alias: store_sales @@ -463,14 +424,14 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(14,6)) Execution mode: vectorized - Map 31 + Map 28 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) @@ -486,25 +447,25 @@ STAGE PLANS: Map 32 Map Operator Tree: TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_customer_id is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_customer_sk is not null and c_customer_id is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), ((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2) (type: decimal(14,6)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(14,6)) Execution mode: vectorized - Map 33 + Map 38 Map Operator Tree: TableScan alias: web_sales @@ -525,7 +486,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(14,6)) Execution mode: vectorized - Map 37 + Map 40 Map Operator Tree: TableScan alias: date_dim @@ -545,28 +506,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 38 + Map 6 Map Operator Tree: TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_customer_id is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_customer_sk is not null and c_customer_id is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2) (type: decimal(14,6)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(14,6)) Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan alias: date_dim @@ -586,7 +547,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 + Map 9 Map Operator Tree: TableScan alias: customer @@ -607,27 +568,6 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) Execution mode: vectorized - Map 9 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2) (type: decimal(14,6)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(14,6)) - Execution mode: vectorized Reducer 10 Reduce Operator Tree: Join Operator @@ -635,41 +575,24 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(14,6)) - Reducer 11 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col10 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string) + aggregations: sum(_col10) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) null sort order: zzzzzzz sort order: +++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(24,6)) - Reducer 12 + Reducer 11 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -677,26 +600,38 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col7 (type: decimal(24,6)) - outputColumnNames: _col0, _col7 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col7 > 0) (type: boolean) - Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col7 (type: decimal(24,6)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(24,6)) - Reducer 16 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(24,6)) + Reducer 12 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col2 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col1, _col3, _col4, _col5, _col7, _col8 + Statistics: Num rows: 421645952 Data size: 57099332414 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 421645952 Data size: 57099332414 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(24,6)), _col3 (type: decimal(24,6)), _col5 (type: decimal(24,6)), _col7 (type: decimal(24,6)), _col8 (type: boolean) + Reducer 14 Reduce Operator Tree: Join Operator condition map: @@ -719,23 +654,23 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col10 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string) + aggregations: sum(_col10) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) null sort order: zzzzzzz sort order: +++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(24,6)) Reducer 18 Execution mode: vectorized @@ -745,36 +680,60 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col7 (type: decimal(24,6)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(24,6)) - Reducer 2 + Reducer 19 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 191657247 Data size: 25954241444 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col2 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(14,6)) - Reducer 22 + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 191657247 Data size: 25954241444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(24,6)), _col3 (type: decimal(24,6)) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col10) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) + null sort order: zzzzzzz + sort order: +++++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col7 (type: decimal(24,6)) + Reducer 21 Reduce Operator Tree: Join Operator condition map: @@ -791,31 +750,31 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(14,6)) - Reducer 23 + Reducer 24 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string) + aggregations: sum(_col10) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) null sort order: zzzzzzz sort order: +++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(24,6)) - Reducer 24 + Reducer 25 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -823,26 +782,26 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col7 (type: decimal(24,6)) outputColumnNames: _col0, _col7 - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col7 > 0) (type: boolean) - Statistics: Num rows: 58077952 Data size: 7864921389 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col7 (type: decimal(24,6)), _col7 is not null (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 58077952 Data size: 7864921389 Basic stats: COMPLETE Column stats: NONE + expressions: _col0 (type: string), _col7 (type: decimal(24,6)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 58077952 Data size: 7864921389 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(24,6)), _col2 (type: boolean) - Reducer 28 + Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(24,6)) + Reducer 27 Reduce Operator Tree: Join Operator condition map: @@ -859,43 +818,39 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(14,6)) - Reducer 29 + Reducer 3 + Execution mode: vectorized Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string), _col7 (type: decimal(24,6)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - null sort order: zzzzzzz - sort order: +++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col7 (type: decimal(24,6)) - Reducer 3 + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: decimal(24,6)) + Reducer 30 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col10 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string) + aggregations: sum(_col10) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -907,7 +862,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(24,6)) - Reducer 30 + Reducer 31 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -915,19 +870,26 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col7 (type: decimal(24,6)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: decimal(24,6)) - Reducer 34 + expressions: _col0 (type: string), _col7 (type: decimal(24,6)) + outputColumnNames: _col0, _col7 + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col7 > 0) (type: boolean) + Statistics: Num rows: 58077952 Data size: 7864921389 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col7 (type: decimal(24,6)), _col7 is not null (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 58077952 Data size: 7864921389 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 58077952 Data size: 7864921389 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(24,6)), _col2 (type: boolean) + Reducer 33 Reduce Operator Tree: Join Operator condition map: @@ -936,27 +898,27 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(14,6)) - Reducer 35 + Reducer 36 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col10 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string) + aggregations: sum(_col10) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -968,7 +930,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: decimal(24,6)) - Reducer 36 + Reducer 37 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -995,74 +957,82 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(24,6)), _col2 (type: boolean) - Reducer 4 - Execution mode: vectorized + Reducer 39 Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col7 (type: decimal(24,6)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(24,6)) - Reducer 5 + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(14,6)) + Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 Inner Join 1 to 2 - Inner Join 1 to 3 - Inner Join 1 to 4 - Inner Join 1 to 5 keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col4 (type: string) 2 _col0 (type: string) - 3 _col0 (type: string) - 4 _col0 (type: string) - 5 _col0 (type: string) - outputColumnNames: _col1, _col3, _col5, _col7, _col8, _col10, _col11, _col13, _col14 - Statistics: Num rows: 1916625598 Data size: 169085266687 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col4, _col6, _col8, _col10, _col11, _col13, _col14 + Statistics: Num rows: 927621114 Data size: 125618534033 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (CASE WHEN (_col14) THEN (CASE WHEN (_col8) THEN (((_col1 / _col7) > (_col5 / _col13))) ELSE (false) END) ELSE (false) END and CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col8) THEN (((_col1 / _col7) > (_col11 / _col3))) ELSE (false) END) ELSE (false) END) (type: boolean) - Statistics: Num rows: 479156399 Data size: 42271316627 Basic stats: COMPLETE Column stats: NONE + predicate: (CASE WHEN (_col14) THEN (CASE WHEN (_col11) THEN (((_col6 / _col10) > (_col4 / _col13))) ELSE (false) END) ELSE (false) END and CASE WHEN (_col8 is not null) THEN (CASE WHEN (_col11) THEN (((_col6 / _col10) > (_col2 / _col8))) ELSE (false) END) ELSE (false) END) (type: boolean) + Statistics: Num rows: 231905278 Data size: 31404633440 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col10 (type: string) + expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 479156399 Data size: 42271316627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231905278 Data size: 31404633440 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + - Statistics: Num rows: 479156399 Data size: 42271316627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231905278 Data size: 31404633440 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 479156399 Data size: 42271316627 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231905278 Data size: 31404633440 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(14,6)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query40.q.out b/ql/src/test/results/clientpositive/perf/spark/query40.q.out index 46b2180a76..dbfdbf9c7d 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query40.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query40.q.out @@ -74,42 +74,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 1 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00') (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: warehouse + filterExpr: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00') (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + predicate: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), (CAST( d_date AS DATE) < DATE'1998-04-08') (type: boolean), (CAST( d_date AS DATE) >= DATE'1998-04-08') (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + expressions: w_warehouse_sk (type: int), w_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) - 1 _col0 (type: int) + 1 _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work Map 9 Map Operator Tree: TableScan - alias: warehouse - filterExpr: w_warehouse_sk is not null (type: boolean) - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00') (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: w_warehouse_sk is not null (type: boolean) - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + predicate: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00') (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: w_warehouse_sk (type: int), w_state (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int), (CAST( d_date AS DATE) < DATE'1998-04-08') (type: boolean), (CAST( d_date AS DATE) >= DATE'1998-04-08') (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -118,13 +118,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 329), Map 6 (PARTITION-LEVEL SORT, 329) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 336), Reducer 2 (PARTITION-LEVEL SORT, 336) - Reducer 4 <- Reducer 3 (GROUP, 447) - Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 329), Map 7 (PARTITION-LEVEL SORT, 329) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 336), Reducer 3 (PARTITION-LEVEL SORT, 336) + Reducer 5 <- Reducer 4 (GROUP, 447) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: catalog_sales @@ -145,7 +145,7 @@ STAGE PLANS: Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: catalog_returns @@ -166,7 +166,7 @@ STAGE PLANS: Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan alias: item @@ -187,7 +187,7 @@ STAGE PLANS: Statistics: Num rows: 51333 Data size: 73728460 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Reducer 2 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -204,7 +204,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: decimal(7,2)), _col7 (type: decimal(7,2)) - Reducer 3 + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -224,20 +224,20 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col4, _col7, _col9, _col11, _col12 input vertices: - 1 Map 8 + 1 Map 9 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col7, _col9, _col11, _col12, _col14 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col6, _col9, _col11, _col13, _col14 input vertices: - 1 Map 9 + 0 Map 1 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col14 (type: string), _col9 (type: string), CASE WHEN (_col11) THEN ((_col4 - CASE WHEN (_col7 is not null) THEN (_col7) ELSE (0) END)) ELSE (0) END (type: decimal(8,2)), CASE WHEN (_col12) THEN ((_col4 - CASE WHEN (_col7 is not null) THEN (_col7) ELSE (0) END)) ELSE (0) END (type: decimal(8,2)) + expressions: _col1 (type: string), _col11 (type: string), CASE WHEN (_col13) THEN ((_col6 - CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END)) ELSE (0) END (type: decimal(8,2)), CASE WHEN (_col14) THEN ((_col6 - CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END)) ELSE (0) END (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -255,7 +255,7 @@ STAGE PLANS: Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(18,2)), _col3 (type: decimal(18,2)) - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -271,7 +271,7 @@ STAGE PLANS: Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(18,2)), _col3 (type: decimal(18,2)) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query42.q.out b/ql/src/test/results/clientpositive/perf/spark/query42.q.out index b280ee1534..479cc62255 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query42.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query42.q.out @@ -56,13 +56,34 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 440), Reducer 2 (PARTITION-LEVEL SORT, 440) - Reducer 4 <- Reducer 3 (GROUP, 481) - Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 440), Reducer 6 (PARTITION-LEVEL SORT, 440) + Reducer 3 <- Reducer 2 (GROUP, 481) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: item + filterExpr: ((i_manager_id = 1) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_manager_id = 1) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_category_id (type: int), i_category (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string) + Execution mode: vectorized + Map 5 Map Operator Tree: TableScan alias: store_sales @@ -83,7 +104,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: dt @@ -103,27 +124,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: item - filterExpr: ((i_manager_id = 1) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((i_manager_id = 1) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_category_id (type: int), i_category (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: string) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator @@ -131,29 +131,12 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6 + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col5 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: int), _col6 (type: string) + aggregations: sum(_col5) + keys: _col1 (type: int), _col2 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -165,7 +148,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -184,7 +167,7 @@ STAGE PLANS: sort order: -++ Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 5 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -201,6 +184,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query44.q.out b/ql/src/test/results/clientpositive/perf/spark/query44.q.out index 4e169e6402..3c82897a35 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query44.q.out @@ -1,5 +1,5 @@ -Warning: Shuffle Join JOIN[19][tables = [$hdt$_2, $hdt$_3]] in Work 'Reducer 3' is a cross product -Warning: Shuffle Join JOIN[47][tables = [$hdt$_3, $hdt$_4]] in Work 'Reducer 13' is a cross product +Warning: Shuffle Join JOIN[25][tables = [$hdt$_2, $hdt$_3]] in Work 'Reducer 8' is a cross product +Warning: Shuffle Join JOIN[53][tables = [$hdt$_3, $hdt$_4]] in Work 'Reducer 15' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -82,49 +82,42 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 15 (GROUP, 100) - Reducer 12 <- Map 1 (GROUP, 199) - Reducer 13 <- Reducer 12 (PARTITION-LEVEL SORT, 1), Reducer 16 (PARTITION-LEVEL SORT, 1) - Reducer 14 <- Reducer 13 (PARTITION-LEVEL SORT, 1009) - Reducer 16 <- Map 15 (GROUP, 100) - Reducer 2 <- Map 1 (GROUP, 199) - Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 1009) - Reducer 5 <- Reducer 14 (PARTITION-LEVEL SORT, 1009), Reducer 4 (PARTITION-LEVEL SORT, 1009) - Reducer 6 <- Map 17 (PARTITION-LEVEL SORT, 1009), Reducer 5 (PARTITION-LEVEL SORT, 1009) - Reducer 7 <- Map 18 (PARTITION-LEVEL SORT, 1009), Reducer 6 (PARTITION-LEVEL SORT, 1009) - Reducer 8 <- Reducer 7 (SORT, 1) + Reducer 10 <- Reducer 16 (PARTITION-LEVEL SORT, 1009), Reducer 9 (PARTITION-LEVEL SORT, 1009) + Reducer 12 <- Map 11 (GROUP, 100) + Reducer 14 <- Map 13 (GROUP, 199) + Reducer 15 <- Reducer 14 (PARTITION-LEVEL SORT, 1), Reducer 18 (PARTITION-LEVEL SORT, 1) + Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 1009) + Reducer 18 <- Map 11 (GROUP, 100) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1009), Reducer 5 (PARTITION-LEVEL SORT, 1009) + Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1009), Reducer 10 (PARTITION-LEVEL SORT, 1009) + Reducer 7 <- Map 13 (GROUP, 199) + Reducer 8 <- Reducer 12 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) + Reducer 9 <- Reducer 8 (PARTITION-LEVEL SORT, 1009) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: ss1 - filterExpr: (ss_store_sk = 410) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: i2 + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_store_sk = 410) (type: boolean) - Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_item_sk (type: int), ss_net_profit (type: decimal(7,2)) - outputColumnNames: ss_item_sk, ss_net_profit - Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(ss_net_profit), count(ss_net_profit) - keys: ss_item_sk (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) + expressions: i_item_sk (type: int), i_product_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 15 + Map 11 Map Operator Tree: TableScan alias: store_sales @@ -152,31 +145,38 @@ STAGE PLANS: Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized - Map 17 + Map 13 Map Operator Tree: TableScan - alias: i1 - filterExpr: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + alias: ss1 + filterExpr: (ss_store_sk = 410) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_store_sk = 410) (type: boolean) + Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int), i_product_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + expressions: ss_item_sk (type: int), ss_net_profit (type: decimal(7,2)) + outputColumnNames: ss_item_sk, ss_net_profit + Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(ss_net_profit), count(ss_net_profit) + keys: ss_item_sk (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized - Map 18 + Map 4 Map Operator Tree: TableScan - alias: i2 + alias: i1 filterExpr: i_item_sk is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -195,6 +195,23 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1267180808338276 Data size: 224849298143006048 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1267180808338276 Data size: 224849298143006048 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -219,7 +236,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(11,6)) - Reducer 12 + Reducer 14 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -240,7 +257,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: decimal(11,6)) - Reducer 13 + Reducer 15 Reduce Operator Tree: Join Operator condition map: @@ -261,7 +278,7 @@ STAGE PLANS: Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) - Reducer 14 + Reducer 16 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -303,7 +320,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 1151982528066248 Data size: 204408448426844416 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Reducer 16 + Reducer 18 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -329,6 +346,61 @@ STAGE PLANS: Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(11,6)) Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col1, _col3, _col5 + Statistics: Num rows: 1533288844555592 Data size: 272067662546852480 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: int), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1533288844555592 Data size: 272067662546852480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 1533288844555592 Data size: 272067662546852480 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string), _col2 (type: string) + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1533288844555592 Data size: 272067662546852480 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 17700 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 17700 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col4 + Statistics: Num rows: 1393898919384048 Data size: 247334233318131680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 1393898919384048 Data size: 247334233318131680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: int) + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -349,7 +421,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: decimal(11,6)) - Reducer 3 + Reducer 8 Reduce Operator Tree: Join Operator condition map: @@ -370,7 +442,7 @@ STAGE PLANS: Statistics: Num rows: 3455947584198744 Data size: 613225345280533248 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) - Reducer 4 + Reducer 9 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -412,78 +484,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 1151982528066248 Data size: 204408448426844416 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1267180808338276 Data size: 224849298143006048 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1267180808338276 Data size: 224849298143006048 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int) - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 1393898919384048 Data size: 247334233318131680 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1393898919384048 Data size: 247334233318131680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col5 (type: string) - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col5, _col7 - Statistics: Num rows: 1533288844555592 Data size: 272067662546852480 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col5 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1533288844555592 Data size: 272067662546852480 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 1533288844555592 Data size: 272067662546852480 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string), _col2 (type: string) - Reducer 8 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1533288844555592 Data size: 272067662546852480 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 100 Data size: 17700 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 17700 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query45.q.out b/ql/src/test/results/clientpositive/perf/spark/query45.q.out index e5d2676416..e40c75f364 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query45.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query45.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[67][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[68][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select ca_zip, ca_county, sum(ws_sales_price) from web_sales, customer, customer_address, date_dim, item @@ -103,13 +103,13 @@ STAGE PLANS: Spark Edges: Reducer 11 <- Map 10 (GROUP, 6) - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 154), Map 15 (PARTITION-LEVEL SORT, 154) - Reducer 14 <- Map 16 (PARTITION-LEVEL SORT, 706), Reducer 13 (PARTITION-LEVEL SORT, 706) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 706), Reducer 15 (PARTITION-LEVEL SORT, 706) + Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 154), Map 16 (PARTITION-LEVEL SORT, 154) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 524), Reducer 9 (PARTITION-LEVEL SORT, 524) Reducer 3 <- Reducer 2 (GROUP, 224) Reducer 4 <- Reducer 3 (SORT, 1) Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 8), Reducer 11 (PARTITION-LEVEL SORT, 8) - Reducer 9 <- Reducer 14 (PARTITION-LEVEL SORT, 191), Reducer 8 (PARTITION-LEVEL SORT, 191) + Reducer 9 <- Reducer 13 (PARTITION-LEVEL SORT, 191), Reducer 8 (PARTITION-LEVEL SORT, 191) #### A masked pattern was here #### Vertices: Map 1 @@ -172,6 +172,27 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 12 + Map Operator Tree: + TableScan + alias: customer + filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 14 Map Operator Tree: TableScan alias: web_sales @@ -192,7 +213,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 15 + Map 16 Map Operator Tree: TableScan alias: date_dim @@ -212,27 +233,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 16 - Map Operator Tree: - TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Execution mode: vectorized Map 7 Map Operator Tree: TableScan @@ -274,6 +274,23 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) Reducer 13 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col3, _col5 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: decimal(7,2)) + Reducer 15 Reduce Operator Tree: Join Operator condition map: @@ -290,27 +307,6 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - Reducer 14 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col8 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col8 (type: int), _col1 (type: int), _col3 (type: decimal(7,2)) - outputColumnNames: _col1, _col3, _col5 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col5 (type: decimal(7,2)) Reducer 2 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query46.q.out b/ql/src/test/results/clientpositive/perf/spark/query46.q.out index 6de60cf51e..a014639359 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query46.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query46.q.out @@ -105,12 +105,12 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col4 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 11 + Map 9 Map Operator Tree: TableScan alias: household_demographics @@ -125,8 +125,8 @@ STAGE PLANS: Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col3 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -134,57 +134,15 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 829), Reducer 8 (PARTITION-LEVEL SORT, 829) - Reducer 3 <- Map 13 (PARTITION-LEVEL SORT, 637), Reducer 2 (PARTITION-LEVEL SORT, 637) - Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) - Reducer 7 <- Map 12 (PARTITION-LEVEL SORT, 846), Reducer 6 (PARTITION-LEVEL SORT, 846) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 13 (PARTITION-LEVEL SORT, 398) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 637), Reducer 5 (PARTITION-LEVEL SORT, 637) + Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 829), Reducer 8 (PARTITION-LEVEL SORT, 829) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 846), Reducer 12 (PARTITION-LEVEL SORT, 846) Reducer 8 <- Reducer 7 (GROUP, 582) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string) - Execution mode: vectorized - Map 12 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int), ca_city (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized - Map 13 Map Operator Tree: TableScan alias: current_addr @@ -205,7 +163,7 @@ STAGE PLANS: Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 + Map 11 Map Operator Tree: TableScan alias: store_sales @@ -226,7 +184,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) Execution mode: vectorized - Map 9 + Map 13 Map Operator Tree: TableScan alias: date_dim @@ -246,38 +204,102 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 2 + Map 4 + Map Operator Tree: + TableScan + alias: customer + filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string) + Execution mode: vectorized + Map 6 + Map Operator Tree: + TableScan + alias: customer_address + filterExpr: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_city (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reducer 12 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8 - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: int), _col6 (type: string), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)) - Reducer 3 + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col2, _col3, _col4, _col6, _col7, _col8 + input vertices: + 0 Map 10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col3, _col5, _col7, _col8, _col9 + input vertices: + 0 Map 9 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col7 (type: int), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)) + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col6, _col7, _col8, _col10 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col4, _col5, _col6, _col8, _col9, _col10 Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col10 <> _col6) (type: boolean) + predicate: (_col1 <> _col8) (type: boolean) Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col10 (type: string), _col6 (type: string), _col4 (type: int), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)) + expressions: _col5 (type: string), _col4 (type: string), _col1 (type: string), _col8 (type: string), _col6 (type: int), _col9 (type: decimal(17,2)), _col10 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -287,7 +309,7 @@ STAGE PLANS: Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -304,58 +326,36 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Local Work: - Map Reduce Local Work + Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7 - input vertices: - 1 Map 10 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6, _col7 - input vertices: - 1 Map 11 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8 + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: int), _col6 (type: string), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)) Reducer 7 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col12 + 0 _col0 (type: int) + 1 _col5 (type: int) + outputColumnNames: _col1, _col5, _col7, _col9, _col10, _col11 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col6), sum(_col7) - keys: _col1 (type: int), _col12 (type: string), _col3 (type: int), _col5 (type: int) + aggregations: sum(_col10), sum(_col11) + keys: _col5 (type: int), _col1 (type: string), _col7 (type: int), _col9 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 diff --git a/ql/src/test/results/clientpositive/perf/spark/query47.q.out b/ql/src/test/results/clientpositive/perf/spark/query47.q.out index 2aa0a17c43..026b53a211 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query47.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query47.q.out @@ -120,7 +120,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 10 + Map 1 Map Operator Tree: TableScan alias: store @@ -135,8 +135,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col5 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -145,7 +145,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 19 + Map 11 Map Operator Tree: TableScan alias: store @@ -160,8 +160,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col5 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -170,7 +170,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 27 + Map 20 Map Operator Tree: TableScan alias: store @@ -185,8 +185,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col5 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -194,66 +194,25 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 17 (PARTITION-LEVEL SORT, 398) - Reducer 13 <- Map 18 (PARTITION-LEVEL SORT, 442), Reducer 12 (PARTITION-LEVEL SORT, 442) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 442), Reducer 18 (PARTITION-LEVEL SORT, 442) Reducer 14 <- Reducer 13 (GROUP, 529) Reducer 15 <- Reducer 14 (PARTITION-LEVEL SORT, 265) - Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 265) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) - Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 398), Map 25 (PARTITION-LEVEL SORT, 398) - Reducer 22 <- Map 26 (PARTITION-LEVEL SORT, 442), Reducer 21 (PARTITION-LEVEL SORT, 442) + Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 287), Reducer 25 (PARTITION-LEVEL SORT, 287) + Reducer 18 <- Map 17 (PARTITION-LEVEL SORT, 398), Map 19 (PARTITION-LEVEL SORT, 398) + Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 442), Reducer 27 (PARTITION-LEVEL SORT, 442) Reducer 23 <- Reducer 22 (GROUP, 529) Reducer 24 <- Reducer 23 (PARTITION-LEVEL SORT, 265) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442) + Reducer 25 <- Reducer 24 (PARTITION-LEVEL SORT, 265) + Reducer 27 <- Map 26 (PARTITION-LEVEL SORT, 398), Map 28 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 442), Reducer 9 (PARTITION-LEVEL SORT, 442) Reducer 4 <- Reducer 3 (GROUP, 529) Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 265) - Reducer 6 <- Reducer 16 (PARTITION-LEVEL SORT, 551), Reducer 24 (PARTITION-LEVEL SORT, 551), Reducer 5 (PARTITION-LEVEL SORT, 551) + Reducer 6 <- Reducer 16 (PARTITION-LEVEL SORT, 555), Reducer 5 (PARTITION-LEVEL SORT, 555) Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 9 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) - Execution mode: vectorized - Map 11 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) - Execution mode: vectorized - Map 17 + Map 10 Map Operator Tree: TableScan alias: date_dim @@ -274,7 +233,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 18 + Map 12 Map Operator Tree: TableScan alias: item @@ -295,7 +254,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 20 + Map 17 Map Operator Tree: TableScan alias: store_sales @@ -316,7 +275,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 25 + Map 19 Map Operator Tree: TableScan alias: date_dim @@ -337,7 +296,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 26 + Map 2 Map Operator Tree: TableScan alias: item @@ -358,7 +317,49 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 8 + Map 21 + Map Operator Tree: + TableScan + alias: item + filterExpr: (i_item_sk is not null and i_category is not null and i_brand is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (i_item_sk is not null and i_category is not null and i_brand is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand (type: string), i_category (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: vectorized + Map 26 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Execution mode: vectorized + Map 28 Map Operator Tree: TableScan alias: date_dim @@ -379,28 +380,139 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 9 + Map 8 Map Operator Tree: TableScan - alias: item - filterExpr: (i_item_sk is not null and i_category is not null and i_brand is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (i_item_sk is not null and i_category is not null and i_brand is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int), i_brand (type: string), i_category (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Execution mode: vectorized + Reducer 13 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col5, _col6, _col8, _col9 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col5 (type: int) + outputColumnNames: _col1, _col2, _col4, _col5, _col9, _col11, _col12 + input vertices: + 0 Map 11 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col9) + keys: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col11 (type: int), _col12 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) + null sort order: zzzzzz + sort order: ++++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col6 (type: decimal(17,2)) + Reducer 14 Execution mode: vectorized - Reducer 12 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._col5 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: int), _col5 (type: int) + null sort order: aaaazz + sort order: ++++++ + Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + value expressions: _col6 (type: decimal(17,2)) + Reducer 15 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), VALUE._col0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col4 ASC NULLS LAST, _col5 ASC NULLS LAST + partition by: _col3, _col2, _col0, _col1 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col4, _col5 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: rank_window_0 is not null (type: boolean) + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col6 (type: decimal(17,2)), (rank_window_0 + 1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int) + null sort order: zzzzz + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int) + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(17,2)) + Reducer 16 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int) + 1 _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col8 (type: int) + outputColumnNames: _col4, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 421657640 Data size: 37198759428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col14 (type: int) + null sort order: zzzzz + sort order: +++++ + Map-reduce partition columns: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col14 (type: int) + Statistics: Num rows: 421657640 Data size: 37198759428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(17,2)), _col10 (type: int), _col11 (type: int), _col12 (type: decimal(17,2)), _col13 (type: decimal(21,6)) + Reducer 18 Reduce Operator Tree: Join Operator condition map: @@ -417,7 +529,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int) - Reducer 13 + Reducer 22 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -425,23 +537,23 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5, _col6, _col8, _col9 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col5, _col6, _col8, _col9 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col8, _col9, _col11, _col12 + 0 _col0 (type: int) + 1 _col5 (type: int) + outputColumnNames: _col1, _col2, _col4, _col5, _col9, _col11, _col12 input vertices: - 1 Map 19 + 0 Map 20 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col3) - keys: _col11 (type: string), _col12 (type: string), _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int) + aggregations: sum(_col9) + keys: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col11 (type: int), _col12 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -453,7 +565,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) - Reducer 14 + Reducer 23 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -469,7 +581,7 @@ STAGE PLANS: Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: int) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col6 (type: decimal(17,2)) - Reducer 15 + Reducer 24 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -507,7 +619,7 @@ STAGE PLANS: Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE value expressions: avg_window_0 (type: decimal(21,6)), _col6 (type: decimal(17,2)) - Reducer 16 + Reducer 25 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -556,24 +668,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col8 (type: int) Statistics: Num rows: 31943759 Data size: 2818087693 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: int), _col5 (type: int), _col6 (type: decimal(17,2)), _col7 (type: decimal(21,6)) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col5, _col6 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int) - Reducer 21 + Reducer 27 Reduce Operator Tree: Join Operator condition map: @@ -590,100 +685,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int) - Reducer 22 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5, _col6, _col8, _col9 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col8, _col9, _col11, _col12 - input vertices: - 1 Map 27 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col3) - keys: _col11 (type: string), _col12 (type: string), _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) - null sort order: zzzzzz - sort order: ++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col6 (type: decimal(17,2)) - Reducer 23 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._col5 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: int), _col5 (type: int) - null sort order: aaaazz - sort order: ++++++ - Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - value expressions: _col6 (type: decimal(17,2)) - Reducer 24 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), VALUE._col0 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: string, _col4: int, _col5: int, _col6: decimal(17,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col4 ASC NULLS LAST, _col5 ASC NULLS LAST - partition by: _col3, _col2, _col0, _col1 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col4, _col5 - name: rank - window function: GenericUDAFRankEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: rank_window_0 is not null (type: boolean) - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col6 (type: decimal(17,2)), (rank_window_0 - 1) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int) - null sort order: zzzzz - sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int) - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(17,2)) Reducer 3 Local Work: Map Reduce Local Work @@ -692,23 +693,23 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5, _col6, _col8, _col9 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col5, _col6, _col8, _col9 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col8, _col9, _col11, _col12 + 0 _col0 (type: int) + 1 _col5 (type: int) + outputColumnNames: _col1, _col2, _col4, _col5, _col9, _col11, _col12 input vertices: - 1 Map 10 + 0 Map 1 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col3) - keys: _col11 (type: string), _col12 (type: string), _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int) + aggregations: sum(_col9) + keys: _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col11 (type: int), _col12 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -768,7 +769,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col6 (type: decimal(17,2)), (rank_window_0 + 1) (type: int) + expressions: _col3 (type: string), _col2 (type: string), _col0 (type: string), _col1 (type: string), _col6 (type: decimal(17,2)), (rank_window_0 - 1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -783,22 +784,20 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int) - 1 _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col8 (type: int) - 2 _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int) - outputColumnNames: _col4, _col6, _col10, _col11, _col12, _col13, _col19 - Statistics: Num rows: 843315280 Data size: 74397518857 Basic stats: COMPLETE Column stats: NONE + 1 _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col14 (type: int) + outputColumnNames: _col4, _col10, _col12, _col16, _col17, _col18, _col19 + Statistics: Num rows: 463823414 Data size: 40918636257 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col6 (type: string), _col10 (type: int), _col11 (type: int), _col13 (type: decimal(21,6)), _col12 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col19 (type: decimal(17,2)), (_col12 - _col13) (type: decimal(22,6)) + expressions: _col12 (type: string), _col16 (type: int), _col17 (type: int), _col19 (type: decimal(21,6)), _col18 (type: decimal(17,2)), _col10 (type: decimal(17,2)), _col4 (type: decimal(17,2)), (_col18 - _col19) (type: decimal(22,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 843315280 Data size: 74397518857 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636257 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col7 (type: decimal(22,6)), _col2 (type: int) null sort order: zz sort order: ++ - Statistics: Num rows: 843315280 Data size: 74397518857 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636257 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: int), _col3 (type: decimal(21,6)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)) Reducer 7 @@ -807,7 +806,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col2 (type: decimal(21,6)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(17,2)), VALUE._col5 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 843315280 Data size: 74397518857 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 463823414 Data size: 40918636257 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -818,6 +817,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col5, _col6 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query48.q.out b/ql/src/test/results/clientpositive/perf/spark/query48.q.out index e903c4355b..284adeec31 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query48.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query48.q.out @@ -150,7 +150,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 1 Map Operator Tree: TableScan alias: store @@ -165,8 +165,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -174,13 +174,33 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 134), Map 6 (PARTITION-LEVEL SORT, 134) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 146), Reducer 2 (PARTITION-LEVEL SORT, 146) - Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 319), Reducer 3 (PARTITION-LEVEL SORT, 319) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 146), Reducer 7 (PARTITION-LEVEL SORT, 146) + Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 319), Reducer 3 (PARTITION-LEVEL SORT, 319) Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 134), Map 8 (PARTITION-LEVEL SORT, 134) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 6 Map Operator Tree: TableScan alias: store_sales @@ -201,7 +221,7 @@ STAGE PLANS: Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) Execution mode: vectorized - Map 6 + Map 8 Map Operator Tree: TableScan alias: customer_demographics @@ -221,27 +241,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 8 + Map 9 Map Operator Tree: TableScan alias: customer_address @@ -262,23 +262,6 @@ STAGE PLANS: Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean) Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) Reducer 3 Reduce Operator Tree: Join Operator @@ -287,15 +270,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 + outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 232318249 Data size: 20495183396 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col3 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col2 (type: int) + Map-reduce partition columns: _col3 (type: int) Statistics: Num rows: 232318249 Data size: 20495183396 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) + value expressions: _col4 (type: int), _col5 (type: int), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Reducer 4 Local Work: Map Reduce Local Work @@ -304,34 +287,38 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col11, _col12, _col13 + outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col13 Statistics: Num rows: 255550079 Data size: 22544702224 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col11 and _col5) or (_col12 and _col6) or (_col13 and _col7)) (type: boolean) + predicate: ((_col11 and _col6) or (_col12 and _col7) or (_col13 and _col8)) (type: boolean) Statistics: Num rows: 191662557 Data size: 16908526469 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4 - input vertices: - 1 Map 9 - Statistics: Num rows: 210828817 Data size: 18599379519 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col4) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Select Operator + expressions: _col4 (type: int), _col5 (type: int) + outputColumnNames: _col4, _col5 + Statistics: Num rows: 191662557 Data size: 16908526469 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col6 + input vertices: + 0 Map 1 + Statistics: Num rows: 210828817 Data size: 18599379519 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col6) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 5 Execution mode: vectorized Reduce Operator Tree: @@ -347,6 +334,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query49.q.out b/ql/src/test/results/clientpositive/perf/spark/query49.q.out index f7f0d446d5..f800b59c6c 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query49.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query49.q.out @@ -274,48 +274,48 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 13 <- Map 10 (PARTITION-LEVEL SORT, 12), Map 12 (PARTITION-LEVEL SORT, 12) - Reducer 14 <- Map 19 (PARTITION-LEVEL SORT, 21), Reducer 13 (PARTITION-LEVEL SORT, 21) - Reducer 15 <- Reducer 14 (GROUP, 14) + Reducer 10 <- Map 11 (PARTITION-LEVEL SORT, 6), Map 9 (PARTITION-LEVEL SORT, 6) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 21), Reducer 18 (PARTITION-LEVEL SORT, 21) + Reducer 14 <- Reducer 13 (GROUP, 14) + Reducer 15 <- Reducer 14 (PARTITION-LEVEL SORT, 7) Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 7) - Reducer 17 <- Reducer 16 (PARTITION-LEVEL SORT, 7) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 10 (PARTITION-LEVEL SORT, 6) - Reducer 21 <- Map 10 (PARTITION-LEVEL SORT, 15), Map 20 (PARTITION-LEVEL SORT, 15) - Reducer 22 <- Map 27 (PARTITION-LEVEL SORT, 28), Reducer 21 (PARTITION-LEVEL SORT, 28) - Reducer 23 <- Reducer 22 (GROUP, 18) + Reducer 18 <- Map 11 (PARTITION-LEVEL SORT, 12), Map 17 (PARTITION-LEVEL SORT, 12) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 10), Reducer 10 (PARTITION-LEVEL SORT, 10) + Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 28), Reducer 26 (PARTITION-LEVEL SORT, 28) + Reducer 22 <- Reducer 21 (GROUP, 18) + Reducer 23 <- Reducer 22 (PARTITION-LEVEL SORT, 9) Reducer 24 <- Reducer 23 (PARTITION-LEVEL SORT, 9) - Reducer 25 <- Reducer 24 (PARTITION-LEVEL SORT, 9) - Reducer 3 <- Map 11 (PARTITION-LEVEL SORT, 10), Reducer 2 (PARTITION-LEVEL SORT, 10) - Reducer 4 <- Reducer 3 (GROUP, 7) + Reducer 26 <- Map 11 (PARTITION-LEVEL SORT, 15), Map 25 (PARTITION-LEVEL SORT, 15) + Reducer 3 <- Reducer 2 (GROUP, 7) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 4) Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 4) - Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 4) - Reducer 7 <- Reducer 17 (GROUP, 7), Reducer 6 (GROUP, 7) - Reducer 8 <- Reducer 25 (GROUP, 10), Reducer 7 (GROUP, 10) - Reducer 9 <- Reducer 8 (SORT, 1) + Reducer 6 <- Reducer 16 (GROUP, 7), Reducer 5 (GROUP, 7) + Reducer 7 <- Reducer 24 (GROUP, 10), Reducer 6 (GROUP, 10) + Reducer 8 <- Reducer 7 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: ws - filterExpr: ((ws_net_profit > 1) and (ws_net_paid > 0) and (ws_quantity > 0) and ws_order_number is not null and ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: wr + filterExpr: ((wr_return_amt > 10000) and wr_order_number is not null and wr_item_sk is not null) (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ws_net_profit > 1) and (ws_net_paid > 0) and (ws_quantity > 0) and ws_order_number is not null and ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE + predicate: ((wr_return_amt > 10000) and wr_order_number is not null and wr_item_sk is not null) (type: boolean) + Statistics: Num rows: 4799489 Data size: 441731394 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_order_number (type: int), CASE WHEN (ws_quantity is not null) THEN (ws_quantity) ELSE (0) END (type: int), CASE WHEN (ws_net_paid is not null) THEN (ws_net_paid) ELSE (0) END (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE + expressions: wr_item_sk (type: int), wr_order_number (type: int), CASE WHEN (wr_return_quantity is not null) THEN (wr_return_quantity) ELSE (0) END (type: int), CASE WHEN (wr_return_amt is not null) THEN (wr_return_amt) ELSE (0) END (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4799489 Data size: 441731394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4799489 Data size: 441731394 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 10 + Map 11 Map Operator Tree: TableScan alias: date_dim @@ -335,28 +335,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 11 + Map 12 Map Operator Tree: TableScan - alias: wr - filterExpr: ((wr_return_amt > 10000) and wr_order_number is not null and wr_item_sk is not null) (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + alias: cr + filterExpr: ((cr_return_amount > 10000) and cr_order_number is not null and cr_item_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((wr_return_amt > 10000) and wr_order_number is not null and wr_item_sk is not null) (type: boolean) - Statistics: Num rows: 4799489 Data size: 441731394 Basic stats: COMPLETE Column stats: NONE + predicate: ((cr_return_amount > 10000) and cr_order_number is not null and cr_item_sk is not null) (type: boolean) + Statistics: Num rows: 9599627 Data size: 1019078226 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wr_item_sk (type: int), wr_order_number (type: int), CASE WHEN (wr_return_quantity is not null) THEN (wr_return_quantity) ELSE (0) END (type: int), CASE WHEN (wr_return_amt is not null) THEN (wr_return_amt) ELSE (0) END (type: decimal(7,2)) + expressions: cr_item_sk (type: int), cr_order_number (type: int), CASE WHEN (cr_return_quantity is not null) THEN (cr_return_quantity) ELSE (0) END (type: int), CASE WHEN (cr_return_amount is not null) THEN (cr_return_amount) ELSE (0) END (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4799489 Data size: 441731394 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9599627 Data size: 1019078226 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4799489 Data size: 441731394 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9599627 Data size: 1019078226 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 12 + Map 17 Map Operator Tree: TableScan alias: cs @@ -377,28 +377,28 @@ STAGE PLANS: Statistics: Num rows: 10666290 Data size: 1444429931 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map 19 + Map 20 Map Operator Tree: TableScan - alias: cr - filterExpr: ((cr_return_amount > 10000) and cr_order_number is not null and cr_item_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + alias: sr + filterExpr: ((sr_return_amt > 10000) and sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((cr_return_amount > 10000) and cr_order_number is not null and cr_item_sk is not null) (type: boolean) - Statistics: Num rows: 9599627 Data size: 1019078226 Basic stats: COMPLETE Column stats: NONE + predicate: ((sr_return_amt > 10000) and sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) + Statistics: Num rows: 19197050 Data size: 1487398277 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cr_item_sk (type: int), cr_order_number (type: int), CASE WHEN (cr_return_quantity is not null) THEN (cr_return_quantity) ELSE (0) END (type: int), CASE WHEN (cr_return_amount is not null) THEN (cr_return_amount) ELSE (0) END (type: decimal(7,2)) + expressions: sr_item_sk (type: int), sr_ticket_number (type: int), CASE WHEN (sr_return_quantity is not null) THEN (sr_return_quantity) ELSE (0) END (type: int), CASE WHEN (sr_return_amt is not null) THEN (sr_return_amt) ELSE (0) END (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 9599627 Data size: 1019078226 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19197050 Data size: 1487398277 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 9599627 Data size: 1019078226 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19197050 Data size: 1487398277 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 20 + Map 25 Map Operator Tree: TableScan alias: sts @@ -419,28 +419,28 @@ STAGE PLANS: Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map 27 + Map 9 Map Operator Tree: TableScan - alias: sr - filterExpr: ((sr_return_amt > 10000) and sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + alias: ws + filterExpr: ((ws_net_profit > 1) and (ws_net_paid > 0) and (ws_quantity > 0) and ws_order_number is not null and ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((sr_return_amt > 10000) and sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) - Statistics: Num rows: 19197050 Data size: 1487398277 Basic stats: COMPLETE Column stats: NONE + predicate: ((ws_net_profit > 1) and (ws_net_paid > 0) and (ws_quantity > 0) and ws_order_number is not null and ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: sr_item_sk (type: int), sr_ticket_number (type: int), CASE WHEN (sr_return_quantity is not null) THEN (sr_return_quantity) ELSE (0) END (type: int), CASE WHEN (sr_return_amt is not null) THEN (sr_return_amt) ELSE (0) END (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 19197050 Data size: 1487398277 Basic stats: COMPLETE Column stats: NONE + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_order_number (type: int), CASE WHEN (ws_quantity is not null) THEN (ws_quantity) ELSE (0) END (type: int), CASE WHEN (ws_net_paid is not null) THEN (ws_net_paid) ELSE (0) END (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 19197050 Data size: 1487398277 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Reducer 13 + Reducer 10 Reduce Operator Tree: Join Operator condition map: @@ -449,27 +449,27 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 11732919 Data size: 1588872958 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5866775 Data size: 797711773 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 11732919 Data size: 1588872958 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5866775 Data size: 797711773 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) - Reducer 14 + Reducer 13 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col2 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col1, _col3, _col4, _col8, _col9 + 0 _col0 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col2 (type: int) + outputColumnNames: _col2, _col3, _col5, _col7, _col8 Statistics: Num rows: 12906211 Data size: 1747760291 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col8), sum(_col3), sum(_col9), sum(_col4) - keys: _col1 (type: int) + aggregations: sum(_col2), sum(_col7), sum(_col3), sum(_col8) + keys: _col5 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -481,7 +481,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 12906211 Data size: 1747760291 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 15 + Reducer 14 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -497,7 +497,7 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Statistics: Num rows: 6453105 Data size: 873880077 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 16 + Reducer 15 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -536,7 +536,7 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Statistics: Num rows: 6453105 Data size: 873880077 Basic stats: COMPLETE Column stats: NONE value expressions: rank_window_0 (type: int), _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 17 + Reducer 16 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -582,7 +582,7 @@ STAGE PLANS: sort order: +++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(35,20)) Statistics: Num rows: 6453220 Data size: 875080950 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 18 Reduce Operator Tree: Join Operator condition map: @@ -591,44 +591,51 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 5866775 Data size: 797711773 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11732919 Data size: 1588872958 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 5866775 Data size: 797711773 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11732919 Data size: 1588872958 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) - Reducer 21 + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 23466488 Data size: 2070220435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 23466488 Data size: 2070220435 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) - Reducer 22 + 0 _col0 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col2 (type: int) + outputColumnNames: _col2, _col3, _col5, _col7, _col8 + Statistics: Num rows: 6453452 Data size: 877482969 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), sum(_col7), sum(_col3), sum(_col8) + keys: _col5 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6453452 Data size: 877482969 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6453452 Data size: 877482969 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) + Reducer 21 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col2 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col1, _col3, _col4, _col8, _col9 + 0 _col0 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col2 (type: int) + outputColumnNames: _col2, _col3, _col5, _col7, _col8 Statistics: Num rows: 25813137 Data size: 2277242527 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col8), sum(_col3), sum(_col9), sum(_col4) - keys: _col1 (type: int) + aggregations: sum(_col2), sum(_col7), sum(_col3), sum(_col8) + keys: _col5 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -640,7 +647,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 25813137 Data size: 2277242527 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 23 + Reducer 22 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -656,7 +663,7 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Statistics: Num rows: 12906568 Data size: 1138621219 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 24 + Reducer 23 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -695,7 +702,7 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Statistics: Num rows: 12906568 Data size: 1138621219 Basic stats: COMPLETE Column stats: NONE value expressions: rank_window_0 (type: int), _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 25 + Reducer 24 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -742,31 +749,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(35,20)) Statistics: Num rows: 11830988 Data size: 1196621228 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 3 + Reducer 26 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col2 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col1, _col3, _col4, _col8, _col9 - Statistics: Num rows: 6453452 Data size: 877482969 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col8), sum(_col3), sum(_col9), sum(_col4) - keys: _col1 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 6453452 Data size: 877482969 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6453452 Data size: 877482969 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 4 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 23466488 Data size: 2070220435 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 23466488 Data size: 2070220435 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -782,7 +782,7 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Statistics: Num rows: 3226726 Data size: 438741484 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 5 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -821,7 +821,7 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Statistics: Num rows: 3226726 Data size: 438741484 Basic stats: COMPLETE Column stats: NONE value expressions: rank_window_0 (type: int), _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -867,7 +867,7 @@ STAGE PLANS: sort order: +++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(35,20)) Statistics: Num rows: 6453220 Data size: 875080950 Basic stats: COMPLETE Column stats: NONE - Reducer 7 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -892,7 +892,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(35,20)) Statistics: Num rows: 11830988 Data size: 1196621228 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 8 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -911,7 +911,7 @@ STAGE PLANS: Statistics: Num rows: 5915494 Data size: 598310614 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: int), _col2 (type: decimal(35,20)) - Reducer 9 + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query5.q.out b/ql/src/test/results/clientpositive/perf/spark/query5.q.out index 7b64092295..e884fa2e9f 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query5.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query5.q.out @@ -285,7 +285,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 1 Map Operator Tree: TableScan alias: store @@ -310,7 +310,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 23 + Map 16 Map Operator Tree: TableScan alias: web_site @@ -334,40 +334,40 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 329), Map 14 (PARTITION-LEVEL SORT, 329), Map 9 (PARTITION-LEVEL SORT, 329) - Reducer 11 <- Map 15 (PARTITION-LEVEL SORT, 362), Reducer 10 (PARTITION-LEVEL SORT, 362) - Reducer 12 <- Reducer 11 (GROUP, 398) - Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 322), Map 22 (PARTITION-LEVEL SORT, 322), Reducer 20 (PARTITION-LEVEL SORT, 322) - Reducer 18 <- Reducer 17 (GROUP, 389) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 14 (PARTITION-LEVEL SORT, 432), Map 6 (PARTITION-LEVEL SORT, 432) - Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 164), Map 21 (PARTITION-LEVEL SORT, 164) - Reducer 3 <- Reducer 2 (GROUP, 523) - Reducer 4 <- Reducer 12 (GROUP, 1009), Reducer 18 (GROUP, 1009), Reducer 3 (GROUP, 1009) - Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 10 <- Map 9 (PARTITION-LEVEL SORT, 362), Reducer 13 (PARTITION-LEVEL SORT, 362) + Reducer 11 <- Reducer 10 (GROUP, 398) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 329), Map 14 (PARTITION-LEVEL SORT, 329), Map 15 (PARTITION-LEVEL SORT, 329) + Reducer 18 <- Map 17 (PARTITION-LEVEL SORT, 322), Map 23 (PARTITION-LEVEL SORT, 322), Reducer 21 (PARTITION-LEVEL SORT, 322) + Reducer 19 <- Reducer 18 (GROUP, 389) + Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 164), Map 22 (PARTITION-LEVEL SORT, 164) + Reducer 3 <- Map 15 (PARTITION-LEVEL SORT, 432), Map 2 (PARTITION-LEVEL SORT, 432), Map 7 (PARTITION-LEVEL SORT, 432) + Reducer 4 <- Reducer 3 (GROUP, 523) + Reducer 5 <- Reducer 11 (GROUP, 1009), Reducer 19 (GROUP, 1009), Reducer 4 (GROUP, 1009) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 12 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_sold_date_sk is not null and cs_catalog_page_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_sold_date_sk is not null and cs_catalog_page_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_store_sk (type: int), ss_sold_date_sk (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)), 0 (type: decimal(7,2)), 0 (type: decimal(7,2)) + expressions: cs_catalog_page_sk (type: int), cs_sold_date_sk (type: int), cs_ext_sales_price (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)), 0 (type: decimal(7,2)), 0 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633586785 Data size: 55276696920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788717 Data size: 42056843632 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized - Map 13 + Map 14 Map Operator Tree: TableScan alias: catalog_returns @@ -388,7 +388,7 @@ STAGE PLANS: Statistics: Num rows: 316788717 Data size: 42056843632 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized - Map 14 + Map 15 Map Operator Tree: TableScan alias: date_dim @@ -408,28 +408,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 15 - Map Operator Tree: - TableScan - alias: catalog_page - filterExpr: cp_catalog_page_sk is not null (type: boolean) - Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cp_catalog_page_sk is not null (type: boolean) - Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cp_catalog_page_sk (type: int), cp_catalog_page_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized - Map 16 + Map 17 Map Operator Tree: TableScan alias: web_sales @@ -450,7 +429,28 @@ STAGE PLANS: Statistics: Num rows: 302405606 Data size: 41118416712 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized - Map 19 + Map 2 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_store_sk (type: int), ss_sold_date_sk (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)), 0 (type: decimal(7,2)), 0 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633586785 Data size: 55276696920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + Execution mode: vectorized + Map 20 Map Operator Tree: TableScan alias: web_sales @@ -471,7 +471,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map 21 + Map 22 Map Operator Tree: TableScan alias: web_returns @@ -492,7 +492,7 @@ STAGE PLANS: Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map 22 + Map 23 Map Operator Tree: TableScan alias: date_dim @@ -512,7 +512,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: store_returns @@ -536,42 +536,25 @@ STAGE PLANS: Map 9 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_sold_date_sk is not null and cs_catalog_page_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: catalog_page + filterExpr: cp_catalog_page_sk is not null (type: boolean) + Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_sold_date_sk is not null and cs_catalog_page_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: cp_catalog_page_sk is not null (type: boolean) + Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_catalog_page_sk (type: int), cs_sold_date_sk (type: int), cs_ext_sales_price (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)), 0 (type: decimal(7,2)), 0 (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + expressions: cp_catalog_page_sk (type: int), cp_catalog_page_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 316788717 Data size: 42056843632 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Reducer 10 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4, _col5 - Statistics: Num rows: 348467596 Data size: 46262528997 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 348467596 Data size: 46262528997 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) - Reducer 11 Reduce Operator Tree: Join Operator condition map: @@ -579,11 +562,11 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col8 + outputColumnNames: _col1, _col4, _col5, _col6, _col7 Statistics: Num rows: 383314363 Data size: 50888782999 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5) - keys: _col8 (type: string) + aggregations: sum(_col4), sum(_col6), sum(_col5), sum(_col7) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -595,7 +578,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 383314363 Data size: 50888782999 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 12 + Reducer 11 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -623,7 +606,24 @@ STAGE PLANS: Statistics: Num rows: 2273797803 Data size: 251290313118 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) - Reducer 17 + Reducer 13 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 348467596 Data size: 46262528997 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 348467596 Data size: 46262528997 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + Reducer 18 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -641,13 +641,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col8 + outputColumnNames: _col1, _col4, _col5, _col6, _col7 input vertices: - 1 Map 23 + 0 Map 16 Statistics: Num rows: 365910798 Data size: 49753286377 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5) - keys: _col8 (type: string) + aggregations: sum(_col4), sum(_col6), sum(_col5), sum(_col7) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -659,7 +659,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 365910798 Data size: 49753286377 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 18 + Reducer 19 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -687,7 +687,25 @@ STAGE PLANS: Statistics: Num rows: 2273797803 Data size: 251290313118 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) - Reducer 2 + Reducer 21 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col3, _col6, _col7 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col3 (type: int), 0 (type: decimal(7,2)), 0 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 302405606 Data size: 41118416712 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -705,13 +723,13 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col8 + outputColumnNames: _col1, _col4, _col5, _col6, _col7 input vertices: - 1 Map 8 + 0 Map 1 Statistics: Num rows: 766640042 Data size: 66884806171 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5) - keys: _col8 (type: string) + aggregations: sum(_col4), sum(_col6), sum(_col5), sum(_col7) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -723,25 +741,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 766640042 Data size: 66884806171 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) - Reducer 20 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - outputColumnNames: _col1, _col3, _col6, _col7 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: int), 0 (type: decimal(7,2)), 0 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 302405606 Data size: 41118416712 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -769,7 +769,7 @@ STAGE PLANS: Statistics: Num rows: 2273797803 Data size: 251290313118 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -790,7 +790,7 @@ STAGE PLANS: Statistics: Num rows: 1136898901 Data size: 125645156503 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(28,2)) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query50.q.out b/ql/src/test/results/clientpositive/perf/spark/query50.q.out index 33da100cd8..eadcf705b6 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query50.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query50.q.out @@ -134,7 +134,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 6 + Map 7 Map Operator Tree: TableScan alias: store @@ -158,14 +158,54 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 476), Reducer 8 (PARTITION-LEVEL SORT, 476) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 481), Reducer 2 (PARTITION-LEVEL SORT, 481) - Reducer 4 <- Reducer 3 (GROUP, 529) - Reducer 5 <- Reducer 4 (SORT, 1) - Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 36), Map 9 (PARTITION-LEVEL SORT, 36) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 481), Reducer 6 (PARTITION-LEVEL SORT, 481) + Reducer 3 <- Reducer 2 (GROUP, 529) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 476), Reducer 9 (PARTITION-LEVEL SORT, 476) + Reducer 9 <- Map 10 (PARTITION-LEVEL SORT, 36), Map 8 (PARTITION-LEVEL SORT, 36) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: d1 + filterExpr: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 10 + Map Operator Tree: + TableScan + alias: d2 + filterExpr: ((d_year = 2000) and (d_moy = 9) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2000) and (d_moy = 9) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 5 Map Operator Tree: TableScan alias: store_sales @@ -186,7 +226,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col4, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 input vertices: - 1 Map 6 + 1 Map 7 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int) @@ -198,27 +238,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 10 - Map Operator Tree: - TableScan - alias: d1 - filterExpr: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan alias: store_returns @@ -239,44 +259,7 @@ STAGE PLANS: Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Map 9 - Map Operator Tree: - TableScan - alias: d2 - filterExpr: ((d_year = 2000) and (d_moy = 9) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2000) and (d_moy = 9) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) - 1 _col1 (type: int), _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: int) - Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -284,10 +267,10 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + outputColumnNames: _col1, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), CASE WHEN (((_col16 - _col0) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col16 - _col0) > 30) and ((_col16 - _col0) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col16 - _col0) > 60) and ((_col16 - _col0) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col16 - _col0) > 90) and ((_col16 - _col0) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col16 - _col0) > 120)) THEN (1) ELSE (0) END (type: int) + expressions: _col7 (type: string), _col8 (type: int), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), CASE WHEN (((_col17 - _col1) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col17 - _col1) > 30) and ((_col17 - _col1) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col17 - _col1) > 60) and ((_col17 - _col1) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col17 - _col1) > 90) and ((_col17 - _col1) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col17 - _col1) > 120)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -305,7 +288,7 @@ STAGE PLANS: Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -321,7 +304,7 @@ STAGE PLANS: Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col10 (type: bigint), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint), _col14 (type: bigint) - Reducer 5 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -338,7 +321,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col2 (type: int), _col4 (type: int) + 1 _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: int) + Reducer 9 Reduce Operator Tree: Join Operator condition map: diff --git a/ql/src/test/results/clientpositive/perf/spark/query52.q.out b/ql/src/test/results/clientpositive/perf/spark/query52.q.out index c0d3015d2a..22c0dfa302 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query52.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query52.q.out @@ -56,13 +56,34 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 440), Reducer 2 (PARTITION-LEVEL SORT, 440) - Reducer 4 <- Reducer 3 (GROUP, 481) - Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 440), Reducer 6 (PARTITION-LEVEL SORT, 440) + Reducer 3 <- Reducer 2 (GROUP, 481) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: item + filterExpr: ((i_manager_id = 1) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_manager_id = 1) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string) + Execution mode: vectorized + Map 5 Map Operator Tree: TableScan alias: store_sales @@ -83,7 +104,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: dt @@ -103,27 +124,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: item - filterExpr: ((i_manager_id = 1) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((i_manager_id = 1) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: string) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator @@ -131,29 +131,12 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6 + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col5 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: int), _col6 (type: string) + aggregations: sum(_col5) + keys: _col1 (type: int), _col2 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -165,7 +148,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -181,7 +164,7 @@ STAGE PLANS: Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) - Reducer 5 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -202,6 +185,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query53.q.out b/ql/src/test/results/clientpositive/perf/spark/query53.q.out index 35f8a95397..b34569524a 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query53.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query53.q.out @@ -72,7 +72,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 1 Map Operator Tree: TableScan alias: store @@ -87,8 +87,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -96,14 +96,35 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 403), Map 7 (PARTITION-LEVEL SORT, 403) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 438), Reducer 8 (PARTITION-LEVEL SORT, 438) Reducer 4 <- Reducer 3 (GROUP, 529) Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 265) Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 403), Map 9 (PARTITION-LEVEL SORT, 403) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_qoy (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 7 Map Operator Tree: TableScan alias: store_sales @@ -124,7 +145,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 7 + Map 9 Map Operator Tree: TableScan alias: item @@ -145,44 +166,6 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), d_qoy (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int) Reducer 3 Local Work: Map Reduce Local Work @@ -193,21 +176,21 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5, _col7 + outputColumnNames: _col1, _col4, _col5, _col7 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col7 + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col2, _col6, _col8 input vertices: - 1 Map 9 + 0 Map 1 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col3) - keys: _col7 (type: int), _col5 (type: int) + aggregations: sum(_col6) + keys: _col2 (type: int), _col8 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -300,6 +283,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col5 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query54.q.out b/ql/src/test/results/clientpositive/perf/spark/query54.q.out index e82801c07b..753c004ed4 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query54.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[107][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Work 'Reducer 4' is a cross product +Warning: Shuffle Join JOIN[108][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Work 'Reducer 4' is a cross product PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk @@ -161,11 +161,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 9 (PARTITION-LEVEL SORT, 654), Reducer 16 (PARTITION-LEVEL SORT, 654) - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 459), Map 17 (PARTITION-LEVEL SORT, 459), Map 18 (PARTITION-LEVEL SORT, 459) - Reducer 14 <- Map 19 (PARTITION-LEVEL SORT, 504), Reducer 13 (PARTITION-LEVEL SORT, 504) - Reducer 15 <- Map 20 (PARTITION-LEVEL SORT, 1009), Reducer 14 (PARTITION-LEVEL SORT, 1009) - Reducer 16 <- Reducer 15 (GROUP, 610) + Reducer 10 <- Map 9 (PARTITION-LEVEL SORT, 654), Reducer 14 (PARTITION-LEVEL SORT, 654) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 1009), Reducer 17 (PARTITION-LEVEL SORT, 1009) + Reducer 14 <- Reducer 13 (GROUP, 610) + Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 459), Map 18 (PARTITION-LEVEL SORT, 459), Map 19 (PARTITION-LEVEL SORT, 459) + Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 504), Reducer 16 (PARTITION-LEVEL SORT, 504) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) Reducer 22 <- Map 21 (GROUP, 2) Reducer 24 <- Map 23 (GROUP, 2) @@ -202,6 +202,27 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized Map 12 + Map Operator Tree: + TableScan + alias: customer + filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 15 Map Operator Tree: TableScan alias: catalog_sales @@ -222,7 +243,7 @@ STAGE PLANS: Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) Execution mode: vectorized - Map 17 + Map 18 Map Operator Tree: TableScan alias: web_sales @@ -243,7 +264,7 @@ STAGE PLANS: Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) Execution mode: vectorized - Map 18 + Map 19 Map Operator Tree: TableScan alias: item @@ -263,7 +284,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 19 + Map 20 Map Operator Tree: TableScan alias: date_dim @@ -283,27 +304,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 20 - Map Operator Tree: - TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Execution mode: vectorized Map 21 Map Operator Tree: TableScan @@ -478,50 +478,17 @@ STAGE PLANS: Map-reduce partition columns: _col5 (type: int) Statistics: Num rows: 316240138 Data size: 42883351551 Basic stats: COMPLETE Column stats: NONE Reducer 13 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Reducer 14 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 522710951 Data size: 70881569741 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 522710951 Data size: 70881569741 Basic stats: COMPLETE Column stats: NONE - Reducer 15 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col6 + 1 _col1 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 574982058 Data size: 77969728405 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col6 (type: int), _col5 (type: int) + keys: _col1 (type: int), _col0 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 @@ -532,7 +499,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 574982058 Data size: 77969728405 Basic stats: COMPLETE Column stats: NONE - Reducer 16 + Reducer 14 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -551,6 +518,39 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 287491029 Data size: 38984864202 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) + Reducer 16 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 17 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 522710951 Data size: 70881569741 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 522710951 Data size: 70881569741 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query55.q.out b/ql/src/test/results/clientpositive/perf/spark/query55.q.out index 9a4559dfcd..0131a68f54 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query55.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query55.q.out @@ -40,13 +40,34 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 440), Reducer 2 (PARTITION-LEVEL SORT, 440) - Reducer 4 <- Reducer 3 (GROUP, 481) - Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 440), Reducer 6 (PARTITION-LEVEL SORT, 440) + Reducer 3 <- Reducer 2 (GROUP, 481) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: item + filterExpr: ((i_manager_id = 36) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((i_manager_id = 36) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string) + Execution mode: vectorized + Map 5 Map Operator Tree: TableScan alias: store_sales @@ -67,7 +88,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -87,27 +108,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: item - filterExpr: ((i_manager_id = 36) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((i_manager_id = 36) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: string) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator @@ -115,29 +115,12 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6 + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col5 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: int), _col6 (type: string) + aggregations: sum(_col5) + keys: _col1 (type: int), _col2 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -149,7 +132,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -169,7 +152,7 @@ STAGE PLANS: Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) - Reducer 5 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -186,6 +169,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query56.q.out b/ql/src/test/results/clientpositive/perf/spark/query56.q.out index 29274a9c76..c77c934a2b 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query56.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query56.q.out @@ -154,21 +154,21 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) - Reducer 11 <- Map 13 (PARTITION-LEVEL SORT, 596), Reducer 10 (PARTITION-LEVEL SORT, 596) + Reducer 10 <- Map 20 (PARTITION-LEVEL SORT, 596), Reducer 12 (PARTITION-LEVEL SORT, 596) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 13 (PARTITION-LEVEL SORT, 398) Reducer 15 <- Map 1 (PARTITION-LEVEL SORT, 8), Reducer 19 (PARTITION-LEVEL SORT, 8) - Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 375), Reducer 22 (PARTITION-LEVEL SORT, 375) + Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 375), Reducer 21 (PARTITION-LEVEL SORT, 375) Reducer 17 <- Reducer 16 (GROUP, 406) Reducer 19 <- Map 18 (GROUP, 6) - Reducer 21 <- Map 12 (PARTITION-LEVEL SORT, 305), Map 20 (PARTITION-LEVEL SORT, 305) - Reducer 22 <- Map 13 (PARTITION-LEVEL SORT, 494), Reducer 21 (PARTITION-LEVEL SORT, 494) + Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 494), Reducer 23 (PARTITION-LEVEL SORT, 494) + Reducer 23 <- Map 13 (PARTITION-LEVEL SORT, 305), Map 22 (PARTITION-LEVEL SORT, 305) Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 8), Reducer 30 (PARTITION-LEVEL SORT, 8) - Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 191), Reducer 33 (PARTITION-LEVEL SORT, 191) + Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 191), Reducer 32 (PARTITION-LEVEL SORT, 191) Reducer 28 <- Reducer 27 (GROUP, 204) - Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 487), Reducer 15 (PARTITION-LEVEL SORT, 487) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 487), Reducer 15 (PARTITION-LEVEL SORT, 487) Reducer 30 <- Map 18 (GROUP, 6) - Reducer 32 <- Map 31 (PARTITION-LEVEL SORT, 154), Map 34 (PARTITION-LEVEL SORT, 154) - Reducer 33 <- Map 35 (PARTITION-LEVEL SORT, 327), Reducer 32 (PARTITION-LEVEL SORT, 327) + Reducer 32 <- Map 31 (PARTITION-LEVEL SORT, 327), Reducer 34 (PARTITION-LEVEL SORT, 327) + Reducer 34 <- Map 33 (PARTITION-LEVEL SORT, 154), Map 35 (PARTITION-LEVEL SORT, 154) Reducer 4 <- Reducer 3 (GROUP, 529) Reducer 5 <- Reducer 17 (GROUP, 569), Reducer 28 (GROUP, 569), Reducer 4 (GROUP, 569) Reducer 6 <- Reducer 5 (SORT, 1) @@ -195,45 +195,46 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 12 + Map 11 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 2000) and (d_moy = 1) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2000) and (d_moy = 1) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_addr_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized Map 13 Map Operator Tree: TableScan - alias: customer_address - filterExpr: ((ca_gmt_offset = -8) and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_year = 2000) and (d_moy = 1) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ca_gmt_offset = -8) and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 2000) and (d_moy = 1) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int) + expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 18 Map Operator Tree: @@ -262,6 +263,26 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 20 + Map Operator Tree: + TableScan + alias: customer_address + filterExpr: ((ca_gmt_offset = -8) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_gmt_offset = -8) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 22 Map Operator Tree: TableScan alias: catalog_sales @@ -304,6 +325,26 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized Map 31 + Map Operator Tree: + TableScan + alias: customer_address + filterExpr: ((ca_gmt_offset = -8) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_gmt_offset = -8) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 33 Map Operator Tree: TableScan alias: web_sales @@ -324,7 +365,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 34 + Map 35 Map Operator Tree: TableScan alias: date_dim @@ -344,47 +385,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 35 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: ((ca_gmt_offset = -8) and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((ca_gmt_offset = -8) and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 9 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_addr_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) - Execution mode: vectorized Reducer 10 Reduce Operator Tree: Join Operator @@ -392,37 +392,33 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + 1 _col2 (type: int) + outputColumnNames: _col2, _col4 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - Reducer 11 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)) + Reducer 12 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - outputColumnNames: _col2, _col4 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) Reducer 15 Reduce Operator Tree: Join Operator @@ -502,6 +498,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Reducer 21 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col3, _col4 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)) + Reducer 23 Reduce Operator Tree: Join Operator condition map: @@ -518,27 +531,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) - Reducer 22 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: decimal(7,2)) - outputColumnNames: _col3, _col4 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)) Reducer 26 Reduce Operator Tree: Join Operator @@ -648,37 +640,33 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + 1 _col2 (type: int) + outputColumnNames: _col2, _col4 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - Reducer 33 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)) + Reducer 34 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - outputColumnNames: _col2, _col4 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) Reducer 4 Execution mode: vectorized Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/spark/query57.q.out b/ql/src/test/results/clientpositive/perf/spark/query57.q.out index a6e27d53ef..53aed739f7 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query57.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query57.q.out @@ -114,7 +114,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 10 + Map 1 Map Operator Tree: TableScan alias: call_center @@ -129,8 +129,8 @@ STAGE PLANS: Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -139,7 +139,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 19 + Map 11 Map Operator Tree: TableScan alias: call_center @@ -154,8 +154,8 @@ STAGE PLANS: Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -164,7 +164,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 27 + Map 20 Map Operator Tree: TableScan alias: call_center @@ -179,8 +179,8 @@ STAGE PLANS: Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -188,66 +188,25 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 306), Map 17 (PARTITION-LEVEL SORT, 306) - Reducer 13 <- Map 18 (PARTITION-LEVEL SORT, 341), Reducer 12 (PARTITION-LEVEL SORT, 341) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 341), Reducer 18 (PARTITION-LEVEL SORT, 341) Reducer 14 <- Reducer 13 (GROUP, 406) Reducer 15 <- Reducer 14 (PARTITION-LEVEL SORT, 203) - Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 203) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 8 (PARTITION-LEVEL SORT, 306) - Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 306), Map 25 (PARTITION-LEVEL SORT, 306) - Reducer 22 <- Map 26 (PARTITION-LEVEL SORT, 341), Reducer 21 (PARTITION-LEVEL SORT, 341) + Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 220), Reducer 25 (PARTITION-LEVEL SORT, 220) + Reducer 18 <- Map 17 (PARTITION-LEVEL SORT, 306), Map 19 (PARTITION-LEVEL SORT, 306) + Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 341), Reducer 27 (PARTITION-LEVEL SORT, 341) Reducer 23 <- Reducer 22 (GROUP, 406) Reducer 24 <- Reducer 23 (PARTITION-LEVEL SORT, 203) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 341), Reducer 2 (PARTITION-LEVEL SORT, 341) + Reducer 25 <- Reducer 24 (PARTITION-LEVEL SORT, 203) + Reducer 27 <- Map 26 (PARTITION-LEVEL SORT, 306), Map 28 (PARTITION-LEVEL SORT, 306) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 341), Reducer 9 (PARTITION-LEVEL SORT, 341) Reducer 4 <- Reducer 3 (GROUP, 406) Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 203) - Reducer 6 <- Reducer 16 (PARTITION-LEVEL SORT, 423), Reducer 24 (PARTITION-LEVEL SORT, 423), Reducer 5 (PARTITION-LEVEL SORT, 423) + Reducer 6 <- Reducer 16 (PARTITION-LEVEL SORT, 426), Reducer 5 (PARTITION-LEVEL SORT, 426) Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 9 <- Map 10 (PARTITION-LEVEL SORT, 306), Map 8 (PARTITION-LEVEL SORT, 306) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: catalog_sales - filterExpr: (cs_item_sk is not null and cs_sold_date_sk is not null and cs_call_center_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cs_item_sk is not null and cs_sold_date_sk is not null and cs_call_center_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cs_sold_date_sk (type: int), cs_call_center_sk (type: int), cs_item_sk (type: int), cs_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) - Execution mode: vectorized - Map 11 - Map Operator Tree: - TableScan - alias: catalog_sales - filterExpr: (cs_item_sk is not null and cs_sold_date_sk is not null and cs_call_center_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cs_item_sk is not null and cs_sold_date_sk is not null and cs_call_center_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cs_sold_date_sk (type: int), cs_call_center_sk (type: int), cs_item_sk (type: int), cs_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) - Execution mode: vectorized - Map 17 + Map 10 Map Operator Tree: TableScan alias: date_dim @@ -268,7 +227,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 18 + Map 12 Map Operator Tree: TableScan alias: item @@ -289,7 +248,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 20 + Map 17 Map Operator Tree: TableScan alias: catalog_sales @@ -310,7 +269,7 @@ STAGE PLANS: Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 25 + Map 19 Map Operator Tree: TableScan alias: date_dim @@ -331,7 +290,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 26 + Map 2 Map Operator Tree: TableScan alias: item @@ -352,7 +311,49 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 8 + Map 21 + Map Operator Tree: + TableScan + alias: item + filterExpr: (i_item_sk is not null and i_category is not null and i_brand is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (i_item_sk is not null and i_category is not null and i_brand is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand (type: string), i_category (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: vectorized + Map 26 + Map Operator Tree: + TableScan + alias: catalog_sales + filterExpr: (cs_item_sk is not null and cs_sold_date_sk is not null and cs_call_center_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cs_item_sk is not null and cs_sold_date_sk is not null and cs_call_center_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_call_center_sk (type: int), cs_item_sk (type: int), cs_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Execution mode: vectorized + Map 28 Map Operator Tree: TableScan alias: date_dim @@ -373,28 +374,139 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 9 + Map 8 Map Operator Tree: TableScan - alias: item - filterExpr: (i_item_sk is not null and i_category is not null and i_brand is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_item_sk is not null and cs_sold_date_sk is not null and cs_call_center_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (i_item_sk is not null and i_category is not null and i_brand is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_item_sk is not null and cs_sold_date_sk is not null and cs_call_center_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int), i_brand (type: string), i_category (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + expressions: cs_sold_date_sk (type: int), cs_call_center_sk (type: int), cs_item_sk (type: int), cs_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Execution mode: vectorized + Reducer 13 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col2, _col4, _col6, _col8, _col9 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col1, _col3, _col4, _col8, _col10, _col11 + input vertices: + 0 Map 11 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col8) + keys: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col10 (type: int), _col11 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + null sort order: zzzzz + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(17,2)) + Reducer 14 Execution mode: vectorized - Reducer 12 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col3 (type: int), _col4 (type: int) + null sort order: aaazz + sort order: +++++ + Map-reduce partition columns: _col2 (type: string), _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(17,2)) + Reducer 15 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: int, _col5: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col3 ASC NULLS LAST, _col4 ASC NULLS LAST + partition by: _col2, _col1, _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col3, _col4 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: rank_window_0 is not null (type: boolean) + Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col5 (type: decimal(17,2)), (rank_window_0 + 1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: string) + null sort order: zzzz + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: string) + Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)) + Reducer 16 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: string) + 1 _col0 (type: string), _col1 (type: string), _col7 (type: int), _col2 (type: string) + outputColumnNames: _col3, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 210822976 Data size: 28549666132 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: string), _col6 (type: string), _col12 (type: int), _col7 (type: string) + null sort order: zzzz + sort order: ++++ + Map-reduce partition columns: _col5 (type: string), _col6 (type: string), _col12 (type: int), _col7 (type: string) + Statistics: Num rows: 210822976 Data size: 28549666132 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(17,2)), _col8 (type: int), _col9 (type: int), _col10 (type: decimal(17,2)), _col11 (type: decimal(21,6)) + Reducer 18 Reduce Operator Tree: Join Operator condition map: @@ -411,7 +523,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int) - Reducer 13 + Reducer 22 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -419,23 +531,23 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6, _col8, _col9 + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col2, _col4, _col6, _col8, _col9 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col8, _col9, _col11 + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col1, _col3, _col4, _col8, _col10, _col11 input vertices: - 1 Map 19 + 0 Map 20 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col3) - keys: _col11 (type: string), _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int) + aggregations: sum(_col8) + keys: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col10 (type: int), _col11 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -447,7 +559,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) - Reducer 14 + Reducer 23 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -463,7 +575,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col3 (type: int) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: int), _col5 (type: decimal(17,2)) - Reducer 15 + Reducer 24 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -501,7 +613,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string), _col1 (type: string), _col0 (type: string) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE value expressions: avg_window_0 (type: decimal(21,6)), _col5 (type: decimal(17,2)) - Reducer 16 + Reducer 25 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -550,24 +662,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col7 (type: int), _col2 (type: string) Statistics: Num rows: 15971437 Data size: 2162853414 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col4 (type: int), _col5 (type: decimal(17,2)), _col6 (type: decimal(21,6)) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col5, _col6 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int) - Reducer 21 + Reducer 27 Reduce Operator Tree: Join Operator condition map: @@ -584,100 +679,6 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int) - Reducer 22 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6, _col8, _col9 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col8, _col9, _col11 - input vertices: - 1 Map 27 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col3) - keys: _col11 (type: string), _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) - null sort order: zzzzz - sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int) - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(17,2)) - Reducer 23 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col3 (type: int), _col4 (type: int) - null sort order: aaazz - sort order: +++++ - Map-reduce partition columns: _col2 (type: string), _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(17,2)) - Reducer 24 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: decimal(17,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: int, _col5: decimal(17,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col3 ASC NULLS LAST, _col4 ASC NULLS LAST - partition by: _col2, _col1, _col0 - raw input shape: - window functions: - window function definition - alias: rank_window_0 - arguments: _col3, _col4 - name: rank - window function: GenericUDAFRankEvaluator - window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) - isPivotResult: true - Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: rank_window_0 is not null (type: boolean) - Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col5 (type: decimal(17,2)), (rank_window_0 - 1) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: string) - null sort order: zzzz - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: string) - Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(17,2)) Reducer 3 Local Work: Map Reduce Local Work @@ -686,23 +687,23 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6, _col8, _col9 + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col2, _col4, _col6, _col8, _col9 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col8, _col9, _col11 + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col1, _col3, _col4, _col8, _col10, _col11 input vertices: - 1 Map 10 + 0 Map 1 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col3) - keys: _col11 (type: string), _col8 (type: string), _col9 (type: string), _col5 (type: int), _col6 (type: int) + aggregations: sum(_col8) + keys: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col10 (type: int), _col11 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -762,7 +763,7 @@ STAGE PLANS: predicate: rank_window_0 is not null (type: boolean) Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col5 (type: decimal(17,2)), (rank_window_0 + 1) (type: int) + expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col5 (type: decimal(17,2)), (rank_window_0 - 1) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -777,22 +778,20 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: string) - 1 _col0 (type: string), _col1 (type: string), _col7 (type: int), _col2 (type: string) - 2 _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: string) - outputColumnNames: _col3, _col5, _col6, _col8, _col9, _col10, _col11, _col16 - Statistics: Num rows: 421645952 Data size: 57099332264 Basic stats: COMPLETE Column stats: NONE + 1 _col5 (type: string), _col6 (type: string), _col12 (type: int), _col7 (type: string) + outputColumnNames: _col3, _col8, _col10, _col11, _col13, _col14, _col15, _col16 + Statistics: Num rows: 231905278 Data size: 31404633425 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col6 (type: string), _col8 (type: int), _col9 (type: int), _col11 (type: decimal(21,6)), _col10 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col16 (type: decimal(17,2)), (_col10 - _col11) (type: decimal(22,6)) + expressions: _col10 (type: string), _col11 (type: string), _col13 (type: int), _col14 (type: int), _col16 (type: decimal(21,6)), _col15 (type: decimal(17,2)), _col8 (type: decimal(17,2)), _col3 (type: decimal(17,2)), (_col15 - _col16) (type: decimal(22,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 421645952 Data size: 57099332264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231905278 Data size: 31404633425 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col8 (type: decimal(22,6)), _col2 (type: int) null sort order: zz sort order: ++ - Statistics: Num rows: 421645952 Data size: 57099332264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231905278 Data size: 31404633425 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: decimal(21,6)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)) Reducer 7 @@ -801,7 +800,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: decimal(21,6)), VALUE._col4 (type: decimal(17,2)), VALUE._col5 (type: decimal(17,2)), VALUE._col6 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 421645952 Data size: 57099332264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231905278 Data size: 31404633425 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE @@ -812,6 +811,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col5, _col6 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query58.q.out b/ql/src/test/results/clientpositive/perf/spark/query58.q.out index ac03820049..c624d7a699 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query58.q.out @@ -1,6 +1,6 @@ -Warning: Map Join MAPJOIN[180][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[181][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[182][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[192][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[193][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[194][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain with ss_items as (select i_item_id item_id @@ -152,10 +152,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 12 <- Map 11 (GROUP, 1) + Reducer 15 <- Map 14 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 11 + Map 14 Map Operator Tree: TableScan alias: date_dim @@ -178,7 +178,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized - Reducer 12 + Reducer 15 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -201,10 +201,10 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 25 <- Map 24 (GROUP, 1) + Reducer 29 <- Map 28 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 24 + Map 28 Map Operator Tree: TableScan alias: date_dim @@ -227,7 +227,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized - Reducer 25 + Reducer 29 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -250,10 +250,10 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 38 <- Map 37 (GROUP, 1) + Reducer 42 <- Map 41 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 37 + Map 41 Map Operator Tree: TableScan alias: date_dim @@ -276,7 +276,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized - Reducer 38 + Reducer 42 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -299,50 +299,51 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Reducer 9 (GROUP, 2) - Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 2), Reducer 23 (PARTITION-LEVEL SORT, 2) - Reducer 18 <- Map 27 (PARTITION-LEVEL SORT, 398), Reducer 17 (PARTITION-LEVEL SORT, 398) - Reducer 19 <- Map 28 (PARTITION-LEVEL SORT, 442), Reducer 18 (PARTITION-LEVEL SORT, 442) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 10 (PARTITION-LEVEL SORT, 2) - Reducer 20 <- Reducer 19 (GROUP, 481) - Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 2), Map 26 (PARTITION-LEVEL SORT, 2) - Reducer 23 <- Reducer 22 (GROUP, 2) - Reducer 3 <- Map 14 (PARTITION-LEVEL SORT, 306), Reducer 2 (PARTITION-LEVEL SORT, 306) - Reducer 30 <- Map 29 (PARTITION-LEVEL SORT, 2), Reducer 36 (PARTITION-LEVEL SORT, 2) - Reducer 31 <- Map 40 (PARTITION-LEVEL SORT, 154), Reducer 30 (PARTITION-LEVEL SORT, 154) - Reducer 32 <- Map 41 (PARTITION-LEVEL SORT, 174), Reducer 31 (PARTITION-LEVEL SORT, 174) - Reducer 33 <- Reducer 32 (GROUP, 186) - Reducer 35 <- Map 34 (PARTITION-LEVEL SORT, 2), Map 39 (PARTITION-LEVEL SORT, 2) - Reducer 36 <- Reducer 35 (GROUP, 2) - Reducer 4 <- Map 15 (PARTITION-LEVEL SORT, 341), Reducer 3 (PARTITION-LEVEL SORT, 341) - Reducer 5 <- Reducer 4 (GROUP, 369) - Reducer 6 <- Reducer 20 (PARTITION-LEVEL SORT, 518), Reducer 33 (PARTITION-LEVEL SORT, 518), Reducer 5 (PARTITION-LEVEL SORT, 518) - Reducer 7 <- Reducer 6 (SORT, 1) - Reducer 9 <- Map 13 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 2), Map 13 (PARTITION-LEVEL SORT, 2) + Reducer 12 <- Reducer 11 (GROUP, 2) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 341), Reducer 21 (PARTITION-LEVEL SORT, 341) + Reducer 18 <- Reducer 17 (GROUP, 369) + Reducer 19 <- Reducer 18 (PARTITION-LEVEL SORT, 425), Reducer 32 (PARTITION-LEVEL SORT, 425) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 174), Reducer 7 (PARTITION-LEVEL SORT, 174) + Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 306), Reducer 23 (PARTITION-LEVEL SORT, 306) + Reducer 23 <- Map 22 (PARTITION-LEVEL SORT, 2), Reducer 26 (PARTITION-LEVEL SORT, 2) + Reducer 25 <- Map 24 (PARTITION-LEVEL SORT, 2), Map 27 (PARTITION-LEVEL SORT, 2) + Reducer 26 <- Reducer 25 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 186) + Reducer 31 <- Map 30 (PARTITION-LEVEL SORT, 442), Reducer 34 (PARTITION-LEVEL SORT, 442) + Reducer 32 <- Reducer 31 (GROUP, 481) + Reducer 34 <- Map 33 (PARTITION-LEVEL SORT, 398), Reducer 36 (PARTITION-LEVEL SORT, 398) + Reducer 36 <- Map 35 (PARTITION-LEVEL SORT, 2), Reducer 39 (PARTITION-LEVEL SORT, 2) + Reducer 38 <- Map 37 (PARTITION-LEVEL SORT, 2), Map 40 (PARTITION-LEVEL SORT, 2) + Reducer 39 <- Reducer 38 (GROUP, 2) + Reducer 4 <- Reducer 19 (PARTITION-LEVEL SORT, 96), Reducer 3 (PARTITION-LEVEL SORT, 96) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 154), Reducer 9 (PARTITION-LEVEL SORT, 154) + Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 12 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (d_date_sk is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: item + filterExpr: (i_item_sk is not null and i_item_id is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date_sk is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + predicate: (i_item_sk is not null and i_item_id is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_date (type: string) + expressions: i_item_sk (type: int), i_item_id (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 13 + Map 10 Map Operator Tree: TableScan alias: date_dim @@ -363,28 +364,39 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized - Map 14 + Map 13 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: (d_week_seq is not null and (d_date = '1998-02-19')) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: (d_week_seq is not null and (d_date = '1998-02-19')) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + expressions: d_week_seq (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + input vertices: + 1 Reducer 15 + Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 15 + Local Work: + Map Reduce Local Work + Map 16 Map Operator Tree: TableScan alias: item @@ -405,7 +417,28 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 16 + Map 20 + Map Operator Tree: + TableScan + alias: catalog_sales + filterExpr: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + Execution mode: vectorized + Map 22 Map Operator Tree: TableScan alias: date_dim @@ -426,7 +459,28 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 21 + Map 24 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_week_seq is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date (type: string), d_week_seq (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Map 27 Map Operator Tree: TableScan alias: date_dim @@ -447,7 +501,7 @@ STAGE PLANS: 1 outputColumnNames: _col0 input vertices: - 1 Reducer 25 + 1 Reducer 29 Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -458,28 +512,28 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 26 + Map 30 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: item + filterExpr: (i_item_sk is not null and i_item_id is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_week_seq is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + predicate: (i_item_sk is not null and i_item_id is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date (type: string), d_week_seq (type: int) + expressions: i_item_sk (type: int), i_item_id (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 27 + Map 33 Map Operator Tree: TableScan alias: store_sales @@ -500,49 +554,49 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 28 + Map 35 Map Operator Tree: TableScan - alias: item - filterExpr: (i_item_sk is not null and i_item_id is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: (d_date_sk is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (i_item_sk is not null and i_item_id is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + predicate: (d_date_sk is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int), i_item_id (type: string) + expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Execution mode: vectorized - Map 29 + Map 37 Map Operator Tree: TableScan alias: date_dim - filterExpr: (d_date_sk is not null and d_date is not null) (type: boolean) + filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date_sk is not null and d_date is not null) (type: boolean) + predicate: (d_week_seq is not null and d_date is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_date (type: string) + expressions: d_date (type: string), d_week_seq (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col1 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col0 (type: string) Execution mode: vectorized - Map 34 + Map 40 Map Operator Tree: TableScan alias: date_dim @@ -563,7 +617,7 @@ STAGE PLANS: 1 outputColumnNames: _col0 input vertices: - 1 Reducer 38 + 1 Reducer 42 Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -574,28 +628,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 39 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d_week_seq is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date (type: string), d_week_seq (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Execution mode: vectorized - Map 40 + Map 6 Map Operator Tree: TableScan alias: web_sales @@ -616,147 +649,88 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 41 + Map 8 Map Operator Tree: TableScan - alias: item - filterExpr: (i_item_sk is not null and i_item_id is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: (d_date_sk is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (i_item_sk is not null and i_item_id is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + predicate: (d_date_sk is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int), i_item_id (type: string) + expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: (d_week_seq is not null and (d_date = '1998-02-19')) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d_week_seq is not null and (d_date = '1998-02-19')) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_week_seq (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0 - input vertices: - 1 Reducer 12 - Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 41199072 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 10 + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE - Reducer 17 + Reducer 11 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 0 _col1 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 18 + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reducer 12 + Execution mode: vectorized Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: int) + key expressions: _col0 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(7,2)) - Reducer 19 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE + Reducer 17 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col7 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col4 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col5) - keys: _col7 (type: string) + aggregations: sum(_col4) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 20 + Reducer 18 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -764,19 +738,39 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: decimal(17,2)), (0.9 * _col1) (type: decimal(19,3)), (1.1 * _col1) (type: decimal(20,3)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(19,3)), _col3 (type: decimal(20,3)) - Reducer 22 + Reducer 19 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col7 + Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 BETWEEN _col6 AND _col7 and _col5 BETWEEN _col2 AND _col3) (type: boolean) + Statistics: Num rows: 4732408 Data size: 417494408 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4732408 Data size: 417494408 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(19,3)), _col3 (type: decimal(20,3)), _col5 (type: decimal(17,2)), _col6 (type: decimal(19,3)), _col7 (type: decimal(20,3)) + Reducer 2 Reduce Operator Tree: Join Operator condition map: @@ -784,35 +778,23 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) - outputColumnNames: _col2 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col4 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col2 (type: string) + aggregations: sum(_col4) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 23 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)) + Reducer 21 Reduce Operator Tree: Join Operator condition map: @@ -820,16 +802,16 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col4, _col5 + outputColumnNames: _col1, _col2 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: int) + key expressions: _col1 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col4 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(7,2)) - Reducer 30 + value expressions: _col2 (type: decimal(7,2)) + Reducer 23 Reduce Operator Tree: Join Operator condition map: @@ -845,48 +827,43 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 31 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col5 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(7,2)) - Reducer 32 + Reducer 25 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col5, _col7 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col5) - keys: _col7 (type: string) + keys: _col0 (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)) - Reducer 33 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reducer 26 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -906,7 +883,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(19,3)), _col3 (type: decimal(20,3)) - Reducer 35 + Reducer 31 Reduce Operator Tree: Join Operator condition map: @@ -914,143 +891,185 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) - outputColumnNames: _col2 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col4 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col2 (type: string) + aggregations: sum(_col4) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 36 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)) + Reducer 32 Execution mode: vectorized Reduce Operator Tree: Group By Operator + aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: decimal(17,2)), (0.9 * _col1) (type: decimal(19,3)), (1.1 * _col1) (type: decimal(20,3)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(19,3)), _col3 (type: decimal(20,3)) + Reducer 34 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + Reducer 36 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE - Reducer 4 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reducer 38 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col5, _col7 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col5) - keys: _col7 (type: string) + keys: _col0 (type: string) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)) - Reducer 5 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reducer 39 Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: decimal(17,2)), (0.9 * _col1) (type: decimal(19,3)), (1.1 * _col1) (type: decimal(20,3)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(19,3)), _col3 (type: decimal(20,3)) - Reducer 6 + outputColumnNames: _col0 + Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE + Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col7, _col9, _col10, _col11 - Statistics: Num rows: 766650239 Data size: 67634106674 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11 + Statistics: Num rows: 95833780 Data size: 13030622681 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col9 BETWEEN _col2 AND _col3 and _col9 BETWEEN _col6 AND _col7 and _col1 BETWEEN _col10 AND _col11 and _col5 BETWEEN _col10 AND _col11 and _col1 BETWEEN _col6 AND _col7 and _col5 BETWEEN _col2 AND _col3) (type: boolean) - Statistics: Num rows: 1442 Data size: 127213 Basic stats: COMPLETE Column stats: NONE + predicate: (_col1 BETWEEN _col6 AND _col7 and _col1 BETWEEN _col10 AND _col11 and _col5 BETWEEN _col2 AND _col3 and _col9 BETWEEN _col2 AND _col3) (type: boolean) + Statistics: Num rows: 14606 Data size: 1985993 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col5 (type: decimal(17,2)), (((_col5 / ((_col5 + _col1) + _col9)) / 3) * 100) (type: decimal(38,17)), _col1 (type: decimal(17,2)), (((_col1 / ((_col5 + _col1) + _col9)) / 3) * 100) (type: decimal(38,17)), _col9 (type: decimal(17,2)), (((_col9 / ((_col5 + _col1) + _col9)) / 3) * 100) (type: decimal(38,17)), (((_col5 + _col1) + _col9) / 3) (type: decimal(23,6)) + expressions: _col4 (type: string), _col9 (type: decimal(17,2)), (((_col9 / ((_col9 + _col5) + _col1)) / 3) * 100) (type: decimal(38,17)), _col5 (type: decimal(17,2)), (((_col5 / ((_col9 + _col5) + _col1)) / 3) * 100) (type: decimal(38,17)), _col1 (type: decimal(17,2)), (((_col1 / ((_col9 + _col5) + _col1)) / 3) * 100) (type: decimal(38,17)), (((_col9 + _col5) + _col1) / 3) (type: decimal(23,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1442 Data size: 127213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14606 Data size: 1985993 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: decimal(17,2)) null sort order: zz sort order: ++ - Statistics: Num rows: 1442 Data size: 127213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14606 Data size: 1985993 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(38,17)), _col3 (type: decimal(17,2)), _col4 (type: decimal(38,17)), _col5 (type: decimal(17,2)), _col6 (type: decimal(38,17)), _col7 (type: decimal(23,6)) - Reducer 7 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,17)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(38,17)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(38,17)), VALUE._col5 (type: decimal(23,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1442 Data size: 127213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14606 Data size: 1985993 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 + Reducer 7 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col2 + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query59.q.out b/ql/src/test/results/clientpositive/perf/spark/query59.q.out index 6621893191..ee2b6888f5 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query59.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query59.q.out @@ -103,7 +103,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 1 Map Operator Tree: TableScan alias: store @@ -118,8 +118,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -128,7 +128,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 16 + Map 10 Map Operator Tree: TableScan alias: store @@ -143,8 +143,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -152,38 +152,17 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 14 (PARTITION-LEVEL SORT, 398) - Reducer 12 <- Reducer 11 (GROUP, 437) - Reducer 13 <- Map 15 (PARTITION-LEVEL SORT, 219), Reducer 12 (PARTITION-LEVEL SORT, 219) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Reducer 2 (GROUP, 437) - Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 219), Reducer 3 (PARTITION-LEVEL SORT, 219) - Reducer 5 <- Reducer 13 (PARTITION-LEVEL SORT, 529), Reducer 4 (PARTITION-LEVEL SORT, 529) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 15 (PARTITION-LEVEL SORT, 398) + Reducer 13 <- Reducer 12 (GROUP, 437) + Reducer 14 <- Map 16 (PARTITION-LEVEL SORT, 219), Reducer 13 (PARTITION-LEVEL SORT, 219) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) + Reducer 4 <- Reducer 3 (GROUP, 437) + Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 219), Reducer 4 (PARTITION-LEVEL SORT, 219) + Reducer 6 <- Reducer 14 (PARTITION-LEVEL SORT, 529), Reducer 5 (PARTITION-LEVEL SORT, 529) + Reducer 7 <- Reducer 6 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) - Execution mode: vectorized - Map 10 + Map 11 Map Operator Tree: TableScan alias: store_sales @@ -204,7 +183,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 14 + Map 15 Map Operator Tree: TableScan alias: date_dim @@ -225,7 +204,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized - Map 15 + Map 16 Map Operator Tree: TableScan alias: d @@ -245,7 +224,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 2 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: date_dim @@ -266,7 +266,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized - Map 8 + Map 9 Map Operator Tree: TableScan alias: d @@ -286,7 +286,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 11 + Reducer 12 Reduce Operator Tree: Join Operator condition map: @@ -314,7 +314,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)) - Reducer 12 + Reducer 13 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -330,7 +330,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)) - Reducer 13 + Reducer 14 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -346,14 +346,14 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col10 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8, _col9 input vertices: - 1 Map 16 + 0 Map 10 Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col10 (type: string), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)) + expressions: _col2 (type: int), _col1 (type: string), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)), _col9 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -363,7 +363,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string), (_col0 - 52) (type: int) Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)) - Reducer 2 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -391,7 +391,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)) - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -407,7 +407,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)) - Reducer 4 + Reducer 5 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -423,31 +423,31 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7, _col8, _col9, _col10, _col11 input vertices: - 1 Map 9 + 0 Map 1 Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col11 (type: string), _col0 (type: int) + key expressions: _col1 (type: string), _col3 (type: int) null sort order: zz sort order: ++ - Map-reduce partition columns: _col11 (type: string), _col0 (type: int) + Map-reduce partition columns: _col1 (type: string), _col3 (type: int) Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)), _col12 (type: string) - Reducer 5 + value expressions: _col2 (type: string), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)), _col9 (type: decimal(17,2)), _col10 (type: decimal(17,2)), _col11 (type: decimal(17,2)) + Reducer 6 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col11 (type: string), _col0 (type: int) + 0 _col1 (type: string), _col3 (type: int) 1 _col1 (type: string), (_col0 - 52) (type: int) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col15, _col16, _col17, _col18, _col19, _col20 + outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 421657640 Data size: 37198759476 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col12 (type: string), _col11 (type: string), _col0 (type: int), (_col2 / _col15) (type: decimal(37,20)), (_col3 / _col16) (type: decimal(37,20)), (_col4 / _col4) (type: decimal(37,20)), (_col5 / _col17) (type: decimal(37,20)), (_col6 / _col18) (type: decimal(37,20)), (_col7 / _col19) (type: decimal(37,20)), (_col8 / _col20) (type: decimal(37,20)) + expressions: _col2 (type: string), _col1 (type: string), _col3 (type: int), (_col5 / _col15) (type: decimal(37,20)), (_col6 / _col16) (type: decimal(37,20)), (_col7 / _col7) (type: decimal(37,20)), (_col8 / _col17) (type: decimal(37,20)), (_col9 / _col18) (type: decimal(37,20)), (_col10 / _col19) (type: decimal(37,20)), (_col11 / _col20) (type: decimal(37,20)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 421657640 Data size: 37198759476 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -457,7 +457,7 @@ STAGE PLANS: Statistics: Num rows: 421657640 Data size: 37198759476 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(37,20)), _col4 (type: decimal(37,20)), _col5 (type: decimal(37,20)), _col6 (type: decimal(37,20)), _col7 (type: decimal(37,20)), _col8 (type: decimal(37,20)), _col9 (type: decimal(37,20)) - Reducer 6 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query6.q.out b/ql/src/test/results/clientpositive/perf/spark/query6.q.out index 4c6d99e1c5..fbed06e650 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query6.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query6.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[86][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[88][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select a.ca_state state, count(*) cnt from customer_address a @@ -144,16 +144,16 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Reducer 16 (PARTITION-LEVEL SORT, 439), Reducer 9 (PARTITION-LEVEL SORT, 439) - Reducer 11 <- Map 20 (PARTITION-LEVEL SORT, 1009), Reducer 10 (PARTITION-LEVEL SORT, 1009) - Reducer 13 <- Map 12 (GROUP, 6) - Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 2), Reducer 18 (PARTITION-LEVEL SORT, 2) - Reducer 16 <- Map 19 (PARTITION-LEVEL SORT, 398), Reducer 15 (PARTITION-LEVEL SORT, 398) - Reducer 18 <- Map 17 (GROUP, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 849), Reducer 11 (PARTITION-LEVEL SORT, 849) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 8), Reducer 14 (PARTITION-LEVEL SORT, 8) + Reducer 12 <- Reducer 11 (PARTITION-LEVEL SORT, 439), Reducer 16 (PARTITION-LEVEL SORT, 439) + Reducer 14 <- Map 13 (GROUP, 6) + Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 398), Reducer 18 (PARTITION-LEVEL SORT, 398) + Reducer 18 <- Map 17 (PARTITION-LEVEL SORT, 2), Reducer 20 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 849), Reducer 9 (PARTITION-LEVEL SORT, 849) + Reducer 20 <- Map 19 (GROUP, 2) Reducer 3 <- Reducer 2 (GROUP, 582) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 8), Reducer 13 (PARTITION-LEVEL SORT, 8) + Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 1009), Reducer 12 (PARTITION-LEVEL SORT, 1009) #### A masked pattern was here #### Vertices: Map 1 @@ -189,7 +189,28 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 12 + Map 10 + Map Operator Tree: + TableScan + alias: i + filterExpr: (i_item_sk is not null and i_current_price is not null and i_category is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (i_item_sk is not null and i_current_price is not null and i_category is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_current_price (type: decimal(7,2)), i_category (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: decimal(7,2)) + Execution mode: vectorized + Map 13 Map Operator Tree: TableScan alias: j @@ -213,7 +234,28 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized - Map 14 + Map 15 + Map Operator Tree: + TableScan + alias: s + filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Execution mode: vectorized + Map 17 Map Operator Tree: TableScan alias: d @@ -234,7 +276,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 17 + Map 19 Map Operator Tree: TableScan alias: date_dim @@ -260,28 +302,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 19 - Map Operator Tree: - TableScan - alias: s - filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null and ss_item_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null and ss_item_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int) - Execution mode: vectorized - Map 20 + Map 8 Map Operator Tree: TableScan alias: c @@ -302,28 +323,26 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: i - filterExpr: (i_item_sk is not null and i_current_price is not null and i_category is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (i_item_sk is not null and i_current_price is not null and i_category is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_current_price (type: decimal(7,2)), i_category (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: decimal(7,2)) - Execution mode: vectorized - Reducer 10 + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > _col4) (type: boolean) + Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE + Reducer 12 Reduce Operator Tree: Join Operator condition map: @@ -339,27 +358,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col7 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reducer 11 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col7 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col12 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col12 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reducer 13 + Reducer 14 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -382,57 +381,39 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(19,7)) - Reducer 15 + Reducer 16 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 16 + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reducer 18 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col4, _col5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: int), _col5 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) - Reducer 18 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -457,6 +438,20 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 20 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Operator Tree: @@ -499,19 +494,16 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col4 - Statistics: Num rows: 508200 Data size: 729916518 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > _col4) (type: boolean) - Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 169400 Data size: 243305506 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col7 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query60.q.out b/ql/src/test/results/clientpositive/perf/spark/query60.q.out index 7eecf02008..6b47ff4c67 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query60.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query60.q.out @@ -174,21 +174,21 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) - Reducer 11 <- Map 13 (PARTITION-LEVEL SORT, 596), Reducer 10 (PARTITION-LEVEL SORT, 596) + Reducer 10 <- Map 20 (PARTITION-LEVEL SORT, 596), Reducer 12 (PARTITION-LEVEL SORT, 596) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 13 (PARTITION-LEVEL SORT, 398) Reducer 15 <- Map 1 (PARTITION-LEVEL SORT, 7), Reducer 19 (PARTITION-LEVEL SORT, 7) - Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 375), Reducer 22 (PARTITION-LEVEL SORT, 375) + Reducer 16 <- Reducer 15 (PARTITION-LEVEL SORT, 375), Reducer 21 (PARTITION-LEVEL SORT, 375) Reducer 17 <- Reducer 16 (GROUP, 406) Reducer 19 <- Map 18 (GROUP, 3) - Reducer 21 <- Map 12 (PARTITION-LEVEL SORT, 305), Map 20 (PARTITION-LEVEL SORT, 305) - Reducer 22 <- Map 13 (PARTITION-LEVEL SORT, 494), Reducer 21 (PARTITION-LEVEL SORT, 494) + Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 494), Reducer 23 (PARTITION-LEVEL SORT, 494) + Reducer 23 <- Map 13 (PARTITION-LEVEL SORT, 305), Map 22 (PARTITION-LEVEL SORT, 305) Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 7), Reducer 30 (PARTITION-LEVEL SORT, 7) - Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 191), Reducer 33 (PARTITION-LEVEL SORT, 191) + Reducer 27 <- Reducer 26 (PARTITION-LEVEL SORT, 191), Reducer 32 (PARTITION-LEVEL SORT, 191) Reducer 28 <- Reducer 27 (GROUP, 204) - Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 487), Reducer 15 (PARTITION-LEVEL SORT, 487) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 487), Reducer 15 (PARTITION-LEVEL SORT, 487) Reducer 30 <- Map 18 (GROUP, 3) - Reducer 32 <- Map 31 (PARTITION-LEVEL SORT, 154), Map 34 (PARTITION-LEVEL SORT, 154) - Reducer 33 <- Map 35 (PARTITION-LEVEL SORT, 327), Reducer 32 (PARTITION-LEVEL SORT, 327) + Reducer 32 <- Map 31 (PARTITION-LEVEL SORT, 327), Reducer 34 (PARTITION-LEVEL SORT, 327) + Reducer 34 <- Map 33 (PARTITION-LEVEL SORT, 154), Map 35 (PARTITION-LEVEL SORT, 154) Reducer 4 <- Reducer 3 (GROUP, 529) Reducer 5 <- Reducer 17 (GROUP, 569), Reducer 28 (GROUP, 569), Reducer 4 (GROUP, 569) Reducer 6 <- Reducer 5 (SORT, 1) @@ -215,45 +215,46 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 12 + Map 11 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 1999) and (d_moy = 9) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 1999) and (d_moy = 9) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_addr_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized Map 13 Map Operator Tree: TableScan - alias: customer_address - filterExpr: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_year = 1999) and (d_moy = 9) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 1999) and (d_moy = 9) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int) + expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 18 Map Operator Tree: @@ -282,6 +283,26 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 20 + Map Operator Tree: + TableScan + alias: customer_address + filterExpr: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 22 Map Operator Tree: TableScan alias: catalog_sales @@ -324,6 +345,26 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: vectorized Map 31 + Map Operator Tree: + TableScan + alias: customer_address + filterExpr: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 33 Map Operator Tree: TableScan alias: web_sales @@ -344,7 +385,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 34 + Map 35 Map Operator Tree: TableScan alias: date_dim @@ -364,47 +405,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 35 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((ca_gmt_offset = -6) and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 9 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_addr_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) - Execution mode: vectorized Reducer 10 Reduce Operator Tree: Join Operator @@ -412,37 +412,33 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + 1 _col2 (type: int) + outputColumnNames: _col2, _col4 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - Reducer 11 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)) + Reducer 12 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - outputColumnNames: _col2, _col4 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) Reducer 15 Reduce Operator Tree: Join Operator @@ -523,6 +519,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE Reducer 21 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col3, _col4 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)) + Reducer 23 Reduce Operator Tree: Join Operator condition map: @@ -539,27 +552,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) - Reducer 22 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: decimal(7,2)) - outputColumnNames: _col3, _col4 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)) Reducer 26 Reduce Operator Tree: Join Operator @@ -670,37 +662,33 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + 1 _col2 (type: int) + outputColumnNames: _col2, _col4 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - Reducer 33 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: decimal(7,2)) + Reducer 34 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - outputColumnNames: _col2, _col4 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) Reducer 4 Execution mode: vectorized Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/spark/query61.q.out b/ql/src/test/results/clientpositive/perf/spark/query61.q.out index f94233cd46..d88c2c6f17 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query61.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query61.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[102][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[107][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 from @@ -113,7 +113,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 23 + Map 18 Map Operator Tree: TableScan alias: store @@ -128,8 +128,8 @@ STAGE PLANS: Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -140,8 +140,8 @@ STAGE PLANS: Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 697), Map 17 (PARTITION-LEVEL SORT, 697) Reducer 15 <- Reducer 14 (PARTITION-LEVEL SORT, 1009), Reducer 20 (PARTITION-LEVEL SORT, 1009) Reducer 16 <- Reducer 15 (GROUP, 1) - Reducer 19 <- Map 18 (PARTITION-LEVEL SORT, 398), Map 21 (PARTITION-LEVEL SORT, 398) - Reducer 20 <- Map 22 (PARTITION-LEVEL SORT, 440), Reducer 19 (PARTITION-LEVEL SORT, 440) + Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 440), Reducer 22 (PARTITION-LEVEL SORT, 440) + Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 398), Map 23 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: Map 13 @@ -185,7 +185,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 18 + Map 19 + Map Operator Tree: + TableScan + alias: item + filterExpr: (i_item_sk is not null and (i_category = 'Electronics')) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (i_item_sk is not null and (i_category = 'Electronics')) (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 21 Map Operator Tree: TableScan alias: store_sales @@ -206,7 +226,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map 21 + Map 23 Map Operator Tree: TableScan alias: date_dim @@ -226,26 +246,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 22 - Map Operator Tree: - TableScan - alias: item - filterExpr: (i_item_sk is not null and (i_category = 'Electronics')) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (i_item_sk is not null and (i_category = 'Electronics')) (type: boolean) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Reducer 14 Reduce Operator Tree: Join Operator @@ -297,23 +297,6 @@ STAGE PLANS: keys: 0 1 - Reducer 19 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Reducer 20 Local Work: Map Reduce Local Work @@ -322,73 +305,86 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col3, _col4, _col5 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col4, _col6 input vertices: - 1 Map 23 + 0 Map 18 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col4 (type: decimal(7,2)) - outputColumnNames: _col4, _col6 + Reduce Output Operator + key expressions: _col4 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col4 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col6 (type: decimal(7,2)) + value expressions: _col6 (type: decimal(7,2)) + Reducer 22 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: - Map 11 + Map 6 Map Operator Tree: TableScan - alias: promotion - filterExpr: (p_promo_sk is not null and ((p_channel_dmail = 'Y') or (p_channel_email = 'Y') or (p_channel_tv = 'Y'))) (type: boolean) - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + alias: store + filterExpr: ((s_gmt_offset = -7) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_promo_sk is not null and ((p_channel_dmail = 'Y') or (p_channel_email = 'Y') or (p_channel_tv = 'Y'))) (type: boolean) - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + predicate: ((s_gmt_offset = -7) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_promo_sk (type: int) + expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col4 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col5 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 12 + Map 7 Map Operator Tree: TableScan - alias: store - filterExpr: ((s_gmt_offset = -7) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + alias: promotion + filterExpr: (p_promo_sk is not null and ((p_channel_dmail = 'Y') or (p_channel_email = 'Y') or (p_channel_tv = 'Y'))) (type: boolean) + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((s_gmt_offset = -7) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + predicate: (p_promo_sk is not null and ((p_channel_dmail = 'Y') or (p_channel_email = 'Y') or (p_channel_tv = 'Y'))) (type: boolean) + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: s_store_sk (type: int) + expressions: p_promo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col5 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -396,11 +392,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 12 (PARTITION-LEVEL SORT, 398) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 5 (PARTITION-LEVEL SORT, 697) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1009), Reducer 8 (PARTITION-LEVEL SORT, 1009) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1009), Reducer 9 (PARTITION-LEVEL SORT, 1009) Reducer 4 <- Reducer 3 (GROUP, 1) - Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) - Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 440), Reducer 7 (PARTITION-LEVEL SORT, 440) + Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 440), Reducer 11 (PARTITION-LEVEL SORT, 440) #### A masked pattern was here #### Vertices: Map 1 @@ -427,84 +423,101 @@ STAGE PLANS: Map 10 Map Operator Tree: TableScan - alias: item - filterExpr: (i_item_sk is not null and (i_category = 'Electronics')) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_store_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_customer_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (i_item_sk is not null and (i_category = 'Electronics')) (type: boolean) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_store_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_customer_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: i_item_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_promo_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) Execution mode: vectorized - Map 5 + Map 12 Map Operator Tree: TableScan - alias: customer_address - filterExpr: ((ca_gmt_offset = -7) and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_year = 1999) and (d_moy = 11) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ca_gmt_offset = -7) and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 1999) and (d_moy = 11) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int) + expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 6 + Map 5 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_store_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_customer_sk is not null and ss_item_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: ((ca_gmt_offset = -7) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_store_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_customer_sk is not null and ss_item_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: ((ca_gmt_offset = -7) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_promo_sk (type: int), ss_ext_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 9 + Map 8 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 1999) and (d_moy = 11) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: item + filterExpr: (i_item_sk is not null and (i_category = 'Electronics')) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 1999) and (d_moy = 11) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + predicate: (i_item_sk is not null and (i_category = 'Electronics')) (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) + expressions: i_item_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) Reducer 2 Reduce Operator Tree: Join Operator @@ -573,24 +586,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) - Reducer 8 + Reducer 9 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -598,41 +594,37 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col3, _col4, _col5, _col6 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5 + 0 _col0 (type: int) + 1 _col5 (type: int) + outputColumnNames: _col4, _col5, _col7 input vertices: - 1 Map 11 + 0 Map 7 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5 + 0 _col0 (type: int) + 1 _col5 (type: int) + outputColumnNames: _col5, _col8 input vertices: - 1 Map 12 + 0 Map 6 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col5 (type: decimal(7,2)) - outputColumnNames: _col5, _col8 + Reduce Output Operator + key expressions: _col5 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col5 (type: int) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - value expressions: _col8 (type: decimal(7,2)) + value expressions: _col8 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query63.q.out b/ql/src/test/results/clientpositive/perf/spark/query63.q.out index 8b9bcf17b6..b597d20830 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query63.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query63.q.out @@ -74,7 +74,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 1 Map Operator Tree: TableScan alias: store @@ -89,8 +89,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -98,14 +98,35 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 403), Map 7 (PARTITION-LEVEL SORT, 403) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 438), Reducer 8 (PARTITION-LEVEL SORT, 438) Reducer 4 <- Reducer 3 (GROUP, 529) Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 265) Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 403), Map 9 (PARTITION-LEVEL SORT, 403) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_moy (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 7 Map Operator Tree: TableScan alias: store_sales @@ -126,7 +147,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 7 + Map 9 Map Operator Tree: TableScan alias: item @@ -147,44 +168,6 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), d_moy (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int) Reducer 3 Local Work: Map Reduce Local Work @@ -195,21 +178,21 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5, _col7 + outputColumnNames: _col1, _col4, _col5, _col7 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col7 + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col2, _col6, _col8 input vertices: - 1 Map 9 + 0 Map 1 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col3) - keys: _col7 (type: int), _col5 (type: int) + aggregations: sum(_col6) + keys: _col2 (type: int), _col8 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -302,6 +285,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col5 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query65.q.out b/ql/src/test/results/clientpositive/perf/spark/query65.q.out index b3b2e6635a..9597d762dc 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query65.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query65.q.out @@ -74,7 +74,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 13 + Map 1 Map Operator Tree: TableScan alias: store @@ -90,7 +90,7 @@ STAGE PLANS: Spark HashTable Sink Operator keys: 0 _col0 (type: int) - 1 _col0 (type: int) + 1 _col5 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -98,23 +98,23 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Reducer 9 (GROUP PARTITION-LEVEL SORT, 437) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Reducer 2 (GROUP, 437) - Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 328), Reducer 3 (PARTITION-LEVEL SORT, 328) - Reducer 5 <- Map 12 (PARTITION-LEVEL SORT, 86), Reducer 4 (PARTITION-LEVEL SORT, 86) - Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 13 (PARTITION-LEVEL SORT, 398) + Reducer 12 <- Reducer 11 (GROUP PARTITION-LEVEL SORT, 437) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 86), Reducer 8 (PARTITION-LEVEL SORT, 86) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) + Reducer 7 <- Reducer 6 (GROUP, 437) + Reducer 8 <- Reducer 12 (PARTITION-LEVEL SORT, 328), Reducer 7 (PARTITION-LEVEL SORT, 328) #### A masked pattern was here #### Vertices: - Map 1 + Map 10 Map Operator Tree: TableScan alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null) (type: boolean) + filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null) (type: boolean) + predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2)) @@ -128,7 +128,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 11 + Map 13 Map Operator Tree: TableScan alias: date_dim @@ -148,7 +148,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 12 + Map 2 Map Operator Tree: TableScan alias: item @@ -169,48 +169,72 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: string) Execution mode: vectorized - Map 7 + Map 5 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 8 + Map 9 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 10 + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col2 (type: int), _col1 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)) + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -243,70 +267,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 158398803 Data size: 13973988377 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(23,7)) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col3) - keys: _col2 (type: int), _col1 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) Reducer 3 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(17,2)) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col2 <= _col4) (type: boolean) - Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: decimal(17,2)) - Reducer 5 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -314,22 +275,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col6, _col7, _col8, _col9 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 Statistics: Num rows: 127775039 Data size: 11272351047 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col6, _col7, _col8, _col9, _col11 + 1 _col5 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col9 input vertices: - 1 Map 13 + 0 Map 1 Statistics: Num rows: 140552545 Data size: 12399586420 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col11 (type: string), _col6 (type: string), _col2 (type: decimal(17,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: string) + expressions: _col1 (type: string), _col3 (type: string), _col9 (type: decimal(17,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 140552545 Data size: 12399586420 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -339,7 +300,7 @@ STAGE PLANS: Statistics: Num rows: 140552545 Data size: 12399586420 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: string) - Reducer 6 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -356,7 +317,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 + Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -377,9 +338,52 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) null sort order: zz sort order: ++ - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) + Reducer 7 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is not null (type: boolean) + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(17,2)) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 <= _col4) (type: boolean) + Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: decimal(17,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query66.q.out b/ql/src/test/results/clientpositive/perf/spark/query66.q.out index 72f6b1cc09..46a46eb986 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query66.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query66.q.out @@ -467,7 +467,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 1 Map Operator Tree: TableScan alias: warehouse @@ -482,8 +482,8 @@ STAGE PLANS: Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col17 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -492,42 +492,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 6 + Map 7 Map Operator Tree: TableScan - alias: ship_mode - filterExpr: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + alias: time_dim + filterExpr: (t_time BETWEEN 49530 AND 78330 and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + predicate: (t_time BETWEEN 49530 AND 78330 and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: sm_ship_mode_sk (type: int) + expressions: t_time_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 7 + Map 9 Map Operator Tree: TableScan - alias: time_dim - filterExpr: (t_time BETWEEN 49530 AND 78330 and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + alias: ship_mode + filterExpr: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (t_time BETWEEN 49530 AND 78330 and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE + predicate: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: t_time_sk (type: int) + expressions: sm_ship_mode_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -537,7 +537,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 16 + Map 10 Map Operator Tree: TableScan alias: warehouse @@ -552,8 +552,8 @@ STAGE PLANS: Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col17 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -562,42 +562,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 13 + Map 14 Map Operator Tree: TableScan - alias: ship_mode - filterExpr: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + alias: time_dim + filterExpr: (t_time BETWEEN 49530 AND 78330 and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + predicate: (t_time BETWEEN 49530 AND 78330 and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: sm_ship_mode_sk (type: int) + expressions: t_time_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 14 + Map 16 Map Operator Tree: TableScan - alias: time_dim - filterExpr: (t_time BETWEEN 49530 AND 78330 and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + alias: ship_mode + filterExpr: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (t_time BETWEEN 49530 AND 78330 and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE + predicate: ((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: t_time_sk (type: int) + expressions: sm_ship_mode_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -606,27 +606,48 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 369), Map 15 (PARTITION-LEVEL SORT, 369) - Reducer 12 <- Reducer 11 (GROUP, 447) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 186), Map 15 (PARTITION-LEVEL SORT, 186) - Reducer 3 <- Reducer 2 (GROUP, 224) - Reducer 4 <- Reducer 12 (GROUP, 336), Reducer 3 (GROUP, 336) - Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 369), Map 15 (PARTITION-LEVEL SORT, 369) + Reducer 13 <- Reducer 12 (GROUP, 447) + Reducer 3 <- Map 11 (PARTITION-LEVEL SORT, 186), Map 8 (PARTITION-LEVEL SORT, 186) + Reducer 4 <- Reducer 3 (GROUP, 224) + Reducer 5 <- Reducer 13 (GROUP, 336), Reducer 4 (GROUP, 336) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 11 Map Operator Tree: TableScan - alias: web_sales - filterExpr: (ws_warehouse_sk is not null and ws_sold_date_sk is not null and ws_sold_time_sk is not null and ws_ship_mode_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ws_warehouse_sk is not null and ws_sold_date_sk is not null and ws_sold_time_sk is not null and ws_ship_mode_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_sold_time_sk (type: int), ws_ship_mode_sk (type: int), ws_warehouse_sk (type: int), (ws_sales_price * CAST( ws_quantity AS decimal(10,0))) (type: decimal(18,2)), (ws_net_paid_inc_tax * CAST( ws_quantity AS decimal(10,0))) (type: decimal(18,2)) + expressions: d_date_sk (type: int), (d_moy = 1) (type: boolean), (d_moy = 2) (type: boolean), (d_moy = 3) (type: boolean), (d_moy = 4) (type: boolean), (d_moy = 5) (type: boolean), (d_moy = 6) (type: boolean), (d_moy = 7) (type: boolean), (d_moy = 8) (type: boolean), (d_moy = 9) (type: boolean), (d_moy = 10) (type: boolean), (d_moy = 11) (type: boolean), (d_moy = 12) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean) + Execution mode: vectorized + Map 15 + Map Operator Tree: + TableScan + alias: catalog_sales + filterExpr: (cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_ship_mode_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_ship_mode_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cs_sold_date_sk (type: int), cs_sold_time_sk (type: int), cs_ship_mode_sk (type: int), cs_warehouse_sk (type: int), (cs_ext_sales_price * CAST( cs_quantity AS decimal(10,0))) (type: decimal(18,2)), (cs_net_paid_inc_ship_tax * CAST( cs_quantity AS decimal(10,0))) (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -635,41 +656,41 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3, _col4, _col5 input vertices: - 1 Map 6 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: PARTIAL Column stats: NONE + 1 Map 16 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: PARTIAL Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3, _col4, _col5 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col4, _col5, _col6 input vertices: - 1 Map 7 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: PARTIAL Column stats: NONE + 0 Map 14 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: PARTIAL Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(18,2)), _col5 (type: decimal(18,2)) + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: PARTIAL Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: decimal(18,2)), _col6 (type: decimal(18,2)) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 10 + Map 8 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_ship_mode_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (ws_warehouse_sk is not null and ws_sold_date_sk is not null and ws_sold_time_sk is not null and ws_ship_mode_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_ship_mode_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_warehouse_sk is not null and ws_sold_date_sk is not null and ws_sold_time_sk is not null and ws_ship_mode_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_sold_time_sk (type: int), cs_ship_mode_sk (type: int), cs_warehouse_sk (type: int), (cs_ext_sales_price * CAST( cs_quantity AS decimal(10,0))) (type: decimal(18,2)), (cs_net_paid_inc_ship_tax * CAST( cs_quantity AS decimal(10,0))) (type: decimal(18,2)) + expressions: ws_sold_date_sk (type: int), ws_sold_time_sk (type: int), ws_ship_mode_sk (type: int), ws_warehouse_sk (type: int), (ws_sales_price * CAST( ws_quantity AS decimal(10,0))) (type: decimal(18,2)), (ws_net_paid_inc_tax * CAST( ws_quantity AS decimal(10,0))) (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -678,50 +699,29 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3, _col4, _col5 input vertices: - 1 Map 13 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: PARTIAL Column stats: NONE + 1 Map 9 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: PARTIAL Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3, _col4, _col5 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col4, _col5, _col6 input vertices: - 1 Map 14 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: PARTIAL Column stats: NONE + 0 Map 7 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: PARTIAL Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(18,2)), _col5 (type: decimal(18,2)) + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: PARTIAL Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: decimal(18,2)), _col6 (type: decimal(18,2)) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 15 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), (d_moy = 1) (type: boolean), (d_moy = 2) (type: boolean), (d_moy = 3) (type: boolean), (d_moy = 4) (type: boolean), (d_moy = 5) (type: boolean), (d_moy = 6) (type: boolean), (d_moy = 7) (type: boolean), (d_moy = 8) (type: boolean), (d_moy = 9) (type: boolean), (d_moy = 10) (type: boolean), (d_moy = 11) (type: boolean), (d_moy = 12) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean) - Execution mode: vectorized - Reducer 11 + Reducer 12 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -730,21 +730,21 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col17, _col18, _col19 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: PARTIAL Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col22, _col23, _col24, _col25, _col26, _col27 + 0 _col0 (type: int) + 1 _col17 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col25, _col26 input vertices: - 1 Map 16 + 0 Map 10 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: string), _col26 (type: string), _col27 (type: string), CASE WHEN (_col9) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col10) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col11) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col12) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col13) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col14) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col15) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col16) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col17) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col18) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col19) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col20) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col9) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col10) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col11) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col12) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col13) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col14) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col15) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col16) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col17) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col18) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col19) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col20) THEN (_col5) ELSE (0) END (type: decimal(18,2)) + expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), CASE WHEN (_col8) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col9) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col10) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col11) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col12) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col13) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col14) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col15) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col16) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col17) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col18) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col19) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col8) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col9) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col10) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col11) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col12) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col13) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col14) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col15) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col16) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col17) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col18) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col19) THEN (_col26) ELSE (0) END (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE Group By Operator @@ -761,7 +761,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE value expressions: _col6 (type: decimal(28,2)), _col7 (type: decimal(28,2)), _col8 (type: decimal(28,2)), _col9 (type: decimal(28,2)), _col10 (type: decimal(28,2)), _col11 (type: decimal(28,2)), _col12 (type: decimal(28,2)), _col13 (type: decimal(28,2)), _col14 (type: decimal(28,2)), _col15 (type: decimal(28,2)), _col16 (type: decimal(28,2)), _col17 (type: decimal(28,2)), _col18 (type: decimal(28,2)), _col19 (type: decimal(28,2)), _col20 (type: decimal(28,2)), _col21 (type: decimal(28,2)), _col22 (type: decimal(28,2)), _col23 (type: decimal(28,2)), _col24 (type: decimal(28,2)), _col25 (type: decimal(28,2)), _col26 (type: decimal(28,2)), _col27 (type: decimal(28,2)), _col28 (type: decimal(28,2)), _col29 (type: decimal(28,2)) - Reducer 12 + Reducer 13 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -789,7 +789,7 @@ STAGE PLANS: Statistics: Num rows: 316240137 Data size: 42883351482 Basic stats: PARTIAL Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col6 (type: decimal(38,2)), _col7 (type: decimal(38,2)), _col8 (type: decimal(38,2)), _col9 (type: decimal(38,2)), _col10 (type: decimal(38,2)), _col11 (type: decimal(38,2)), _col12 (type: decimal(38,2)), _col13 (type: decimal(38,2)), _col14 (type: decimal(38,2)), _col15 (type: decimal(38,2)), _col16 (type: decimal(38,2)), _col17 (type: decimal(38,2)), _col18 (type: decimal(38,12)), _col19 (type: decimal(38,12)), _col20 (type: decimal(38,12)), _col21 (type: decimal(38,12)), _col22 (type: decimal(38,12)), _col23 (type: decimal(38,12)), _col24 (type: decimal(38,12)), _col25 (type: decimal(38,12)), _col26 (type: decimal(38,12)), _col27 (type: decimal(38,12)), _col28 (type: decimal(38,12)), _col29 (type: decimal(38,12)), _col30 (type: decimal(38,2)), _col31 (type: decimal(38,2)), _col32 (type: decimal(38,2)), _col33 (type: decimal(38,2)), _col34 (type: decimal(38,2)), _col35 (type: decimal(38,2)), _col36 (type: decimal(38,2)), _col37 (type: decimal(38,2)), _col38 (type: decimal(38,2)), _col39 (type: decimal(38,2)), _col40 (type: decimal(38,2)), _col41 (type: decimal(38,2)) - Reducer 2 + Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -798,21 +798,21 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col17, _col18, _col19 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: PARTIAL Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col5, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col22, _col23, _col24, _col25, _col26, _col27 + 0 _col0 (type: int) + 1 _col17 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col25, _col26 input vertices: - 1 Map 9 + 0 Map 1 Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: string), _col26 (type: string), _col27 (type: string), CASE WHEN (_col9) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col10) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col11) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col12) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col13) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col14) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col15) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col16) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col17) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col18) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col19) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col20) THEN (_col4) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col9) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col10) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col11) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col12) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col13) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col14) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col15) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col16) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col17) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col18) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col19) THEN (_col5) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col20) THEN (_col5) ELSE (0) END (type: decimal(18,2)) + expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), CASE WHEN (_col8) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col9) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col10) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col11) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col12) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col13) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col14) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col15) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col16) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col17) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col18) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col19) THEN (_col25) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col8) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col9) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col10) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col11) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col12) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col13) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col14) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col15) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col16) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col17) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col18) THEN (_col26) ELSE (0) END (type: decimal(18,2)), CASE WHEN (_col19) THEN (_col26) ELSE (0) END (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: PARTIAL Column stats: NONE Group By Operator @@ -829,7 +829,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: PARTIAL Column stats: NONE value expressions: _col6 (type: decimal(28,2)), _col7 (type: decimal(28,2)), _col8 (type: decimal(28,2)), _col9 (type: decimal(28,2)), _col10 (type: decimal(28,2)), _col11 (type: decimal(28,2)), _col12 (type: decimal(28,2)), _col13 (type: decimal(28,2)), _col14 (type: decimal(28,2)), _col15 (type: decimal(28,2)), _col16 (type: decimal(28,2)), _col17 (type: decimal(28,2)), _col18 (type: decimal(28,2)), _col19 (type: decimal(28,2)), _col20 (type: decimal(28,2)), _col21 (type: decimal(28,2)), _col22 (type: decimal(28,2)), _col23 (type: decimal(28,2)), _col24 (type: decimal(28,2)), _col25 (type: decimal(28,2)), _col26 (type: decimal(28,2)), _col27 (type: decimal(28,2)), _col28 (type: decimal(28,2)), _col29 (type: decimal(28,2)) - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -857,7 +857,7 @@ STAGE PLANS: Statistics: Num rows: 316240137 Data size: 42883351482 Basic stats: PARTIAL Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col6 (type: decimal(38,2)), _col7 (type: decimal(38,2)), _col8 (type: decimal(38,2)), _col9 (type: decimal(38,2)), _col10 (type: decimal(38,2)), _col11 (type: decimal(38,2)), _col12 (type: decimal(38,2)), _col13 (type: decimal(38,2)), _col14 (type: decimal(38,2)), _col15 (type: decimal(38,2)), _col16 (type: decimal(38,2)), _col17 (type: decimal(38,2)), _col18 (type: decimal(38,12)), _col19 (type: decimal(38,12)), _col20 (type: decimal(38,12)), _col21 (type: decimal(38,12)), _col22 (type: decimal(38,12)), _col23 (type: decimal(38,12)), _col24 (type: decimal(38,12)), _col25 (type: decimal(38,12)), _col26 (type: decimal(38,12)), _col27 (type: decimal(38,12)), _col28 (type: decimal(38,12)), _col29 (type: decimal(38,12)), _col30 (type: decimal(38,2)), _col31 (type: decimal(38,2)), _col32 (type: decimal(38,2)), _col33 (type: decimal(38,2)), _col34 (type: decimal(38,2)), _col35 (type: decimal(38,2)), _col36 (type: decimal(38,2)), _col37 (type: decimal(38,2)), _col38 (type: decimal(38,2)), _col39 (type: decimal(38,2)), _col40 (type: decimal(38,2)), _col41 (type: decimal(38,2)) - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -873,7 +873,7 @@ STAGE PLANS: Statistics: Num rows: 158120068 Data size: 21441675673 Basic stats: PARTIAL Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(38,2)), _col7 (type: decimal(38,2)), _col8 (type: decimal(38,2)), _col9 (type: decimal(38,2)), _col10 (type: decimal(38,2)), _col11 (type: decimal(38,2)), _col12 (type: decimal(38,2)), _col13 (type: decimal(38,2)), _col14 (type: decimal(38,2)), _col15 (type: decimal(38,2)), _col16 (type: decimal(38,2)), _col17 (type: decimal(38,2)), _col18 (type: decimal(38,12)), _col19 (type: decimal(38,12)), _col20 (type: decimal(38,12)), _col21 (type: decimal(38,12)), _col22 (type: decimal(38,12)), _col23 (type: decimal(38,12)), _col24 (type: decimal(38,12)), _col25 (type: decimal(38,12)), _col26 (type: decimal(38,12)), _col27 (type: decimal(38,12)), _col28 (type: decimal(38,12)), _col29 (type: decimal(38,12)), _col30 (type: decimal(38,2)), _col31 (type: decimal(38,2)), _col32 (type: decimal(38,2)), _col33 (type: decimal(38,2)), _col34 (type: decimal(38,2)), _col35 (type: decimal(38,2)), _col36 (type: decimal(38,2)), _col37 (type: decimal(38,2)), _col38 (type: decimal(38,2)), _col39 (type: decimal(38,2)), _col40 (type: decimal(38,2)), _col41 (type: decimal(38,2)) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query67.q.out b/ql/src/test/results/clientpositive/perf/spark/query67.q.out index 024983dd04..40a97965cb 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query67.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query67.q.out @@ -104,7 +104,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 1 Map Operator Tree: TableScan alias: store @@ -119,8 +119,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col7 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -128,14 +128,35 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 442), Reducer 8 (PARTITION-LEVEL SORT, 442) Reducer 4 <- Reducer 3 (GROUP, 1009) Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 1009) Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 + Map Operator Tree: + TableScan + alias: item + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string), i_product_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Execution mode: vectorized + Map 7 Map Operator Tree: TableScan alias: store_sales @@ -156,7 +177,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(18,2)) Execution mode: vectorized - Map 7 + Map 9 Map Operator Tree: TableScan alias: date_dim @@ -177,44 +198,6 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: item - filterExpr: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string), i_product_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: decimal(18,2)), _col5 (type: int), _col6 (type: int), _col7 (type: int) Reducer 3 Local Work: Map Reduce Local Work @@ -223,23 +206,23 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5, _col6, _col7, _col9, _col10, _col11, _col12 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col7, _col8, _col10, _col11, _col12 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col7, _col9, _col10, _col11, _col12, _col14 + 0 _col0 (type: int) + 1 _col7 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col10, _col12, _col13, _col14 input vertices: - 1 Map 9 + 0 Map 1 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col3) - keys: _col14 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: int), 0L (type: bigint) + aggregations: sum(_col10) + keys: _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col12 (type: int), _col13 (type: int), _col14 (type: int), 0L (type: bigint) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 @@ -331,6 +314,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: decimal(18,2)), _col5 (type: int), _col6 (type: int), _col7 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query68.q.out b/ql/src/test/results/clientpositive/perf/spark/query68.q.out index 12f8ce06ed..6e4bcae0fb 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query68.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query68.q.out @@ -119,12 +119,12 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col4 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 11 + Map 9 Map Operator Tree: TableScan alias: household_demographics @@ -139,8 +139,8 @@ STAGE PLANS: Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col3 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -148,57 +148,15 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 829), Reducer 8 (PARTITION-LEVEL SORT, 829) - Reducer 3 <- Map 13 (PARTITION-LEVEL SORT, 637), Reducer 2 (PARTITION-LEVEL SORT, 637) - Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) - Reducer 7 <- Map 12 (PARTITION-LEVEL SORT, 846), Reducer 6 (PARTITION-LEVEL SORT, 846) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 13 (PARTITION-LEVEL SORT, 398) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 637), Reducer 5 (PARTITION-LEVEL SORT, 637) + Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 829), Reducer 8 (PARTITION-LEVEL SORT, 829) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 846), Reducer 12 (PARTITION-LEVEL SORT, 846) Reducer 8 <- Reducer 7 (GROUP, 582) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string) - Execution mode: vectorized - Map 12 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ca_address_sk is not null (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int), ca_city (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized - Map 13 Map Operator Tree: TableScan alias: current_addr @@ -219,7 +177,7 @@ STAGE PLANS: Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 + Map 11 Map Operator Tree: TableScan alias: store_sales @@ -240,7 +198,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) Execution mode: vectorized - Map 9 + Map 13 Map Operator Tree: TableScan alias: date_dim @@ -260,38 +218,102 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 2 + Map 4 + Map Operator Tree: + TableScan + alias: customer + filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_current_addr_sk (type: int), c_first_name (type: string), c_last_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: string) + Execution mode: vectorized + Map 6 + Map Operator Tree: + TableScan + alias: customer_address + filterExpr: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ca_address_sk is not null (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_city (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reducer 12 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: int), _col6 (type: string), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)), _col9 (type: decimal(17,2)) - Reducer 3 + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col2, _col3, _col4, _col6, _col7, _col8, _col9 + input vertices: + 0 Map 10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col3, _col5, _col7, _col8, _col9, _col10 + input vertices: + 0 Map 9 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col7 (type: int), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col6, _col7, _col8, _col9, _col11 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col4, _col5, _col6, _col8, _col9, _col10, _col11 Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col11 <> _col6) (type: boolean) + predicate: (_col1 <> _col8) (type: boolean) Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col11 (type: string), _col6 (type: string), _col4 (type: int), _col7 (type: decimal(17,2)), _col9 (type: decimal(17,2)), _col8 (type: decimal(17,2)) + expressions: _col5 (type: string), _col4 (type: string), _col1 (type: string), _col8 (type: string), _col6 (type: int), _col9 (type: decimal(17,2)), _col11 (type: decimal(17,2)), _col10 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -301,7 +323,7 @@ STAGE PLANS: Statistics: Num rows: 510205766 Data size: 45010500864 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2)) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -318,58 +340,36 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Local Work: - Map Reduce Local Work + Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7, _col8 - input vertices: - 1 Map 10 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8 - input vertices: - 1 Map 11 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 463823414 Data size: 40918636263 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: int), _col6 (type: string), _col7 (type: decimal(17,2)), _col8 (type: decimal(17,2)), _col9 (type: decimal(17,2)) Reducer 7 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8, _col13 + 0 _col0 (type: int) + 1 _col5 (type: int) + outputColumnNames: _col1, _col5, _col7, _col9, _col10, _col11, _col12 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col6), sum(_col7), sum(_col8) - keys: _col1 (type: int), _col13 (type: string), _col3 (type: int), _col5 (type: int) + aggregations: sum(_col10), sum(_col11), sum(_col12) + keys: _col5 (type: int), _col1 (type: string), _col7 (type: int), _col9 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 diff --git a/ql/src/test/results/clientpositive/perf/spark/query7.q.out b/ql/src/test/results/clientpositive/perf/spark/query7.q.out index e5e643b09b..fa2d088d1b 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query7.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query7.q.out @@ -60,7 +60,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 5 Map Operator Tree: TableScan alias: promotion @@ -75,8 +75,8 @@ STAGE PLANS: Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -84,35 +84,14 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) - Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 534), Reducer 3 (PARTITION-LEVEL SORT, 534) - Reducer 5 <- Reducer 4 (GROUP, 582) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 534), Reducer 7 (PARTITION-LEVEL SORT, 534) + Reducer 3 <- Reducer 2 (GROUP, 582) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 438), Reducer 9 (PARTITION-LEVEL SORT, 438) + Reducer 9 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_item_sk is not null and ss_promo_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_item_sk is not null and ss_promo_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_cdemo_sk (type: int), ss_promo_sk (type: int), ss_quantity (type: int), ss_list_price (type: decimal(7,2)), ss_sales_price (type: decimal(7,2)), ss_coupon_amt (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) - Execution mode: vectorized - Map 10 Map Operator Tree: TableScan alias: item @@ -133,7 +112,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 7 + Map 10 Map Operator Tree: TableScan alias: customer_demographics @@ -153,7 +132,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 232725 Data size: 89648269 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 + Map 6 Map Operator Tree: TableScan alias: date_dim @@ -173,65 +152,40 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map 8 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_item_sk is not null and ss_promo_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_item_sk is not null and ss_promo_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_cdemo_sk (type: int), ss_promo_sk (type: int), ss_quantity (type: int), ss_list_price (type: decimal(7,2)), ss_sales_price (type: decimal(7,2)), ss_coupon_amt (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + Execution mode: vectorized Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) - Reducer 3 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col4, _col5, _col6, _col7 - input vertices: - 1 Map 9 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col5, _col6, _col7, _col12 + 1 _col3 (type: int) + outputColumnNames: _col1, _col8, _col9, _col10, _col11 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col4), count(_col4), sum(_col5), count(_col5), sum(_col7), count(_col7), sum(_col6), count(_col6) - keys: _col12 (type: string) + aggregations: sum(_col8), count(_col8), sum(_col9), count(_col9), sum(_col11), count(_col11), sum(_col10), count(_col10) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -244,7 +198,7 @@ STAGE PLANS: Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint), _col7 (type: decimal(17,2)), _col8 (type: bigint) - Reducer 5 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -264,7 +218,7 @@ STAGE PLANS: Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double), _col2 (type: decimal(11,6)), _col3 (type: decimal(11,6)), _col4 (type: decimal(11,6)) - Reducer 6 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -281,6 +235,52 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col3, _col6, _col7, _col8, _col9 + input vertices: + 0 Map 5 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query70.q.out b/ql/src/test/results/clientpositive/perf/spark/query70.q.out index e6780bbc2c..3d490857fc 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query70.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query70.q.out @@ -91,7 +91,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 1 Map Operator Tree: TableScan alias: store @@ -106,8 +106,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -116,7 +116,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 14 + Map 9 Map Operator Tree: TableScan alias: store @@ -131,8 +131,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -140,24 +140,24 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) - Reducer 11 <- Reducer 10 (GROUP, 481) - Reducer 12 <- Reducer 11 (PARTITION-LEVEL SORT, 241) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Reducer 12 (PARTITION-LEVEL SORT, 561), Reducer 2 (PARTITION-LEVEL SORT, 561) - Reducer 4 <- Reducer 3 (GROUP, 1009) - Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 793) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 14 (PARTITION-LEVEL SORT, 398) + Reducer 12 <- Reducer 11 (GROUP, 481) + Reducer 13 <- Reducer 12 (PARTITION-LEVEL SORT, 241) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) + Reducer 4 <- Reducer 13 (PARTITION-LEVEL SORT, 561), Reducer 3 (PARTITION-LEVEL SORT, 561) + Reducer 5 <- Reducer 4 (GROUP, 1009) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 793) + Reducer 7 <- Reducer 6 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 10 Map Operator Tree: TableScan alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + filterExpr: (ss_store_sk is not null and ss_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + predicate: (ss_store_sk is not null and ss_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_net_profit (type: decimal(7,2)) @@ -171,7 +171,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 13 + Map 14 Map Operator Tree: TableScan alias: date_dim @@ -191,48 +191,48 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 2 Map Operator Tree: TableScan - alias: d1 - filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 9 + Map 8 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_store_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: d1 + filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_store_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_net_profit (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 10 + Reducer 11 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -248,15 +248,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col4 input vertices: - 1 Map 14 + 0 Map 9 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) - keys: _col5 (type: string) + aggregations: sum(_col4) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 @@ -268,7 +268,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)) - Reducer 11 + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -284,7 +284,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 12 + Reducer 13 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -325,7 +325,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -341,31 +341,31 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col6, _col7 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col5 input vertices: - 1 Map 8 + 0 Map 1 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col7 (type: string) + key expressions: _col2 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col7 (type: string) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col6 (type: string) - Reducer 3 + value expressions: _col1 (type: string), _col5 (type: decimal(7,2)) + Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col7 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col6, _col7 + outputColumnNames: _col1, _col2, _col5 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col7 (type: string), _col6 (type: string), _col2 (type: decimal(7,2)) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -382,7 +382,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 2299950717 Data size: 202902320028 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: decimal(17,2)) - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -402,7 +402,7 @@ STAGE PLANS: Map-reduce partition columns: (grouping(_col3, 1L) + grouping(_col3, 0L)) (type: bigint), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END (type: string) Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -441,7 +441,7 @@ STAGE PLANS: Statistics: Num rows: 1149975358 Data size: 101451159969 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: decimal(17,2)), _col1 (type: string), _col2 (type: string) - Reducer 6 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query71.q.out b/ql/src/test/results/clientpositive/perf/spark/query71.q.out index fac2025932..dd05a7dfe9 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query71.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query71.q.out @@ -96,37 +96,37 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 13 (PARTITION-LEVEL SORT, 398) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 10 (PARTITION-LEVEL SORT, 154) - Reducer 3 <- Map 14 (PARTITION-LEVEL SORT, 943), Reducer 12 (PARTITION-LEVEL SORT, 943), Reducer 2 (PARTITION-LEVEL SORT, 943), Reducer 9 (PARTITION-LEVEL SORT, 943) - Reducer 4 <- Map 15 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) - Reducer 5 <- Reducer 4 (GROUP, 1009) - Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 9 <- Map 10 (PARTITION-LEVEL SORT, 305), Map 8 (PARTITION-LEVEL SORT, 305) + Reducer 10 <- Map 11 (PARTITION-LEVEL SORT, 305), Map 9 (PARTITION-LEVEL SORT, 305) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 398), Map 14 (PARTITION-LEVEL SORT, 398) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1009), Reducer 7 (PARTITION-LEVEL SORT, 1009) + Reducer 3 <- Reducer 2 (GROUP, 1009) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 6 <- Map 11 (PARTITION-LEVEL SORT, 154), Map 5 (PARTITION-LEVEL SORT, 154) + Reducer 7 <- Map 15 (PARTITION-LEVEL SORT, 943), Reducer 10 (PARTITION-LEVEL SORT, 943), Reducer 13 (PARTITION-LEVEL SORT, 943), Reducer 6 (PARTITION-LEVEL SORT, 943) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: web_sales - filterExpr: (ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_time_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: time_dim + filterExpr: (t_time_sk is not null and (t_meal_time) IN ('breakfast', 'dinner')) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_time_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + predicate: (t_time_sk is not null and (t_meal_time) IN ('breakfast', 'dinner')) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_sold_time_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + expressions: t_time_sk (type: int), t_hour (type: int), t_minute (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 10 + Map 11 Map Operator Tree: TableScan alias: date_dim @@ -146,7 +146,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 11 + Map 12 Map Operator Tree: TableScan alias: store_sales @@ -167,7 +167,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 13 + Map 14 Map Operator Tree: TableScan alias: date_dim @@ -187,7 +187,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 14 + Map 15 Map Operator Tree: TableScan alias: item @@ -208,28 +208,28 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: string) Execution mode: vectorized - Map 15 + Map 5 Map Operator Tree: TableScan - alias: time_dim - filterExpr: (t_time_sk is not null and (t_meal_time) IN ('breakfast', 'dinner')) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_time_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (t_time_sk is not null and (t_meal_time) IN ('breakfast', 'dinner')) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_time_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: t_time_sk (type: int), t_hour (type: int), t_minute (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + expressions: ws_sold_date_sk (type: int), ws_sold_time_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 8 + Map 9 Map Operator Tree: TableScan alias: catalog_sales @@ -250,17 +250,17 @@ STAGE PLANS: Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Reducer 12 + Reducer 10 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: decimal(7,2)), _col2 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) null sort order: z @@ -268,17 +268,17 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(7,2)), _col2 (type: int) - Reducer 2 + Reducer 13 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: decimal(7,2)), _col2 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) null sort order: z @@ -286,36 +286,19 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(7,2)), _col2 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4, _col5 - Statistics: Num rows: 1219665700 Data size: 132367119932 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1219665700 Data size: 132367119932 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(7,2)), _col4 (type: int), _col5 (type: string) - Reducer 4 + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col4, _col5, _col7, _col8 + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col2, _col3, _col7, _col8 Statistics: Num rows: 1341632299 Data size: 145603835081 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0) - keys: _col4 (type: int), _col7 (type: int), _col8 (type: int), _col5 (type: string) + aggregations: sum(_col3) + keys: _col7 (type: int), _col1 (type: int), _col2 (type: int), _col8 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -327,7 +310,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) Statistics: Num rows: 1341632299 Data size: 145603835081 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(17,2)) - Reducer 5 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -346,7 +329,7 @@ STAGE PLANS: sort order: -+ Statistics: Num rows: 670816149 Data size: 72801917486 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: int) - Reducer 6 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -360,17 +343,17 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 + Reducer 6 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: decimal(7,2)), _col2 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) null sort order: z @@ -378,6 +361,23 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 1108786976 Data size: 120333742785 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(7,2)), _col2 (type: int) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4, _col5 + Statistics: Num rows: 1219665700 Data size: 132367119932 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1219665700 Data size: 132367119932 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(7,2)), _col4 (type: int), _col5 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query72.q.out b/ql/src/test/results/clientpositive/perf/spark/query72.q.out index 3b8330986d..c2e8f8d1e2 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query72.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query72.q.out @@ -90,7 +90,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 19 + Map 7 Map Operator Tree: TableScan alias: promotion @@ -105,8 +105,8 @@ STAGE PLANS: Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col15 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col15 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -115,7 +115,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 10 + Map 13 Map Operator Tree: TableScan alias: warehouse @@ -140,7 +140,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 16 + Map 19 Map Operator Tree: TableScan alias: household_demographics @@ -164,15 +164,15 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 308), Map 15 (PARTITION-LEVEL SORT, 308) - Reducer 14 <- Map 17 (PARTITION-LEVEL SORT, 370), Reducer 13 (PARTITION-LEVEL SORT, 370) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 90), Reducer 9 (PARTITION-LEVEL SORT, 90) + Reducer 10 <- Reducer 15 (PARTITION-LEVEL SORT, 412), Reducer 9 (PARTITION-LEVEL SORT, 412) + Reducer 11 <- Map 20 (PARTITION-LEVEL SORT, 150), Reducer 10 (PARTITION-LEVEL SORT, 150) + Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 370), Reducer 17 (PARTITION-LEVEL SORT, 370) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 308), Map 18 (PARTITION-LEVEL SORT, 308) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 90), Reducer 6 (PARTITION-LEVEL SORT, 90) Reducer 3 <- Reducer 2 (GROUP, 73) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 6 <- Map 11 (PARTITION-LEVEL SORT, 6), Map 5 (PARTITION-LEVEL SORT, 6) - Reducer 7 <- Reducer 14 (PARTITION-LEVEL SORT, 412), Reducer 6 (PARTITION-LEVEL SORT, 412) - Reducer 8 <- Map 18 (PARTITION-LEVEL SORT, 150), Reducer 7 (PARTITION-LEVEL SORT, 150) - Reducer 9 <- Map 20 (PARTITION-LEVEL SORT, 66), Reducer 8 (PARTITION-LEVEL SORT, 66) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 66), Reducer 11 (PARTITION-LEVEL SORT, 66) + Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 6), Map 8 (PARTITION-LEVEL SORT, 6) #### A masked pattern was here #### Vertices: Map 1 @@ -195,17 +195,50 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 11 + Map 12 Map Operator Tree: TableScan - alias: d2 - filterExpr: (d_date_sk is not null and d_week_seq is not null) (type: boolean) + alias: inventory + filterExpr: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null and inv_quantity_on_hand is not null) (type: boolean) + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null and inv_quantity_on_hand is not null) (type: boolean) + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col5 + input vertices: + 1 Map 13 + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: string) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Map 14 + Map Operator Tree: + TableScan + alias: d3 + filterExpr: (d_date_sk is not null and UDFToDouble(d_date) is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date_sk is not null and d_week_seq is not null) (type: boolean) + predicate: (d_date_sk is not null and UDFToDouble(d_date) is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_week_seq (type: int) + expressions: d_date_sk (type: int), UDFToDouble(d_date) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -214,9 +247,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col1 (type: double) Execution mode: vectorized - Map 12 + Map 16 Map Operator Tree: TableScan alias: catalog_sales @@ -237,7 +270,7 @@ STAGE PLANS: Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) Execution mode: vectorized - Map 15 + Map 18 Map Operator Tree: TableScan alias: customer_demographics @@ -257,28 +290,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 17 - Map Operator Tree: - TableScan - alias: d3 - filterExpr: (d_date_sk is not null and UDFToDouble(d_date) is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d_date_sk is not null and UDFToDouble(d_date) is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), UDFToDouble(d_date) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Execution mode: vectorized - Map 18 + Map 20 Map Operator Tree: TableScan alias: d1 @@ -299,7 +311,7 @@ STAGE PLANS: Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: double) Execution mode: vectorized - Map 20 + Map 5 Map Operator Tree: TableScan alias: item @@ -320,40 +332,101 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 + Map 8 Map Operator Tree: TableScan - alias: inventory - filterExpr: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null and inv_quantity_on_hand is not null) (type: boolean) - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + alias: d2 + filterExpr: (d_date_sk is not null and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null and inv_quantity_on_hand is not null) (type: boolean) - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + predicate: (d_date_sk is not null and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col5 - input vertices: - 1 Map 10 - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col5 (type: string) + expressions: d_date_sk (type: int), d_week_seq (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col6 (type: int) + outputColumnNames: _col1, _col5, _col7, _col9, _col10, _col14, _col15, _col16, _col17 + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col5 < _col17) (type: boolean) + Statistics: Num rows: 140548651 Data size: 19033110805 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col10 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col10 (type: int) + Statistics: Num rows: 140548651 Data size: 19033110805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col7 (type: string), _col9 (type: double), _col14 (type: int), _col15 (type: int), _col16 (type: int) + Reducer 11 Local Work: Map Reduce Local Work - Reducer 13 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int), _col10 (type: int) + 1 _col1 (type: int), _col0 (type: int) + outputColumnNames: _col7, _col9, _col14, _col15, _col16, _col21, _col22 + Statistics: Num rows: 154603519 Data size: 20936422339 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col9 > _col22) (type: boolean) + Statistics: Num rows: 51534506 Data size: 6978807401 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: string), _col14 (type: int), _col15 (type: int), _col16 (type: int), _col21 (type: int) + outputColumnNames: _col7, _col14, _col15, _col16, _col21 + Statistics: Num rows: 51534506 Data size: 6978807401 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col15 (type: int) + outputColumnNames: _col0, _col8, _col15, _col17, _col22 + input vertices: + 0 Map 7 + Statistics: Num rows: 56687957 Data size: 7676688307 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col15 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col15 (type: int) + Statistics: Num rows: 56687957 Data size: 7676688307 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col8 (type: string), _col17 (type: int), _col22 (type: int) + Reducer 15 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col6, _col7, _col8, _col9 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col6 (type: int) + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) + Reducer 17 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -373,7 +446,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col4, _col5, _col6, _col7 input vertices: - 1 Map 16 + 1 Map 19 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) @@ -382,27 +455,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) - Reducer 14 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col4, _col5, _col6, _col7, _col11 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col11 (type: double), _col0 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) - outputColumnNames: _col1, _col2, _col6, _col7, _col8, _col9 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col6 (type: int) - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) Reducer 2 Reduce Operator Tree: Join Operator @@ -471,80 +523,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col7 - Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: string), _col7 (type: int) - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col6 (type: int) - outputColumnNames: _col3, _col5, _col7, _col9, _col10, _col14, _col15, _col16, _col17 - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col3 < _col17) (type: boolean) - Statistics: Num rows: 140548651 Data size: 19033110805 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col7 (type: int), _col10 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col7 (type: int), _col10 (type: int) - Statistics: Num rows: 140548651 Data size: 19033110805 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: string), _col9 (type: double), _col14 (type: int), _col15 (type: int), _col16 (type: int) - Reducer 8 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col7 (type: int), _col10 (type: int) - 1 _col1 (type: int), _col0 (type: int) - outputColumnNames: _col5, _col9, _col14, _col15, _col16, _col21, _col22 - Statistics: Num rows: 154603519 Data size: 20936422339 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col9 > _col22) (type: boolean) - Statistics: Num rows: 51534506 Data size: 6978807401 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col15 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col14, _col16, _col21, _col23 - input vertices: - 1 Map 19 - Statistics: Num rows: 56687957 Data size: 7676688307 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col14 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col14 (type: int) - Statistics: Num rows: 56687957 Data size: 7676688307 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: string), _col16 (type: int), _col21 (type: int), _col23 (type: int) - Reducer 9 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col14 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col14, _col16, _col21, _col23, _col25 + 1 _col15 (type: int) + outputColumnNames: _col1, _col2, _col10, _col17, _col19, _col24 Statistics: Num rows: 62356754 Data size: 8444357320 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col14 (type: int), _col16 (type: int), _col5 (type: string), _col25 (type: string), _col21 (type: int), _col23 (type: int) + expressions: _col17 (type: int), _col19 (type: int), _col10 (type: string), _col1 (type: string), _col24 (type: int), _col2 (type: int) outputColumnNames: _col4, _col6, _col13, _col15, _col19, _col25 Statistics: Num rows: 62356754 Data size: 8444357320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -554,6 +537,23 @@ STAGE PLANS: Map-reduce partition columns: _col4 (type: int), _col6 (type: int) Statistics: Num rows: 62356754 Data size: 8444357320 Basic stats: COMPLETE Column stats: NONE value expressions: _col13 (type: string), _col15 (type: string), _col19 (type: int), _col25 (type: int) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3, _col5, _col7 + Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: int), _col7 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query73.q.out b/ql/src/test/results/clientpositive/perf/spark/query73.q.out index 2a7bc90c60..12a88afcd7 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query73.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query73.q.out @@ -75,42 +75,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 3 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: ((hd_vehicle_count > 0) and hd_demo_sk is not null and (hd_buy_potential) IN ('>10000', 'unknown') and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (false) END) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: store + filterExpr: (s_store_sk is not null and (s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County')) (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((hd_vehicle_count > 0) and hd_demo_sk is not null and (hd_buy_potential) IN ('>10000', 'unknown') and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (false) END) (type: boolean) - Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE + predicate: (s_store_sk is not null and (s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County')) (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) + expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col3 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work Map 8 Map Operator Tree: TableScan - alias: store - filterExpr: (s_store_sk is not null and (s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County')) (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_vehicle_count > 0) and hd_demo_sk is not null and (hd_buy_potential) IN ('>10000', 'unknown') and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (false) END) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (s_store_sk is not null and (s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County')) (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + predicate: ((hd_vehicle_count > 0) and hd_demo_sk is not null and (hd_buy_potential) IN ('>10000', 'unknown') and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (false) END) (type: boolean) + Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: s_store_sk (type: int) + expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -119,11 +119,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) - Reducer 5 <- Reducer 4 (GROUP, 529) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) + Reducer 6 <- Reducer 5 (GROUP, 529) #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: store_sales @@ -144,7 +144,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -164,7 +164,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 4 + Reducer 5 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -184,21 +184,21 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col3, _col4 input vertices: - 1 Map 7 + 1 Map 8 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col4 + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col2, _col5 input vertices: - 1 Map 8 + 0 Map 3 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() - keys: _col1 (type: int), _col4 (type: int) + keys: _col2 (type: int), _col5 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -210,7 +210,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -260,7 +260,7 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 input vertices: - 1 Reducer 5 + 1 Reducer 6 Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: int), _col7 (type: bigint) diff --git a/ql/src/test/results/clientpositive/perf/spark/query74.q.out b/ql/src/test/results/clientpositive/perf/spark/query74.q.out index 8a3c9d564b..b009ca7159 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query74.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query74.q.out @@ -136,23 +136,45 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) - Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 975), Reducer 10 (PARTITION-LEVEL SORT, 975) - Reducer 12 <- Reducer 11 (GROUP, 481) - Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154) - Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 706), Reducer 16 (PARTITION-LEVEL SORT, 706) - Reducer 18 <- Reducer 17 (GROUP, 186) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154) - Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 398), Map 25 (PARTITION-LEVEL SORT, 398) - Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 975), Reducer 22 (PARTITION-LEVEL SORT, 975) - Reducer 24 <- Reducer 23 (GROUP, 481) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 706), Reducer 2 (PARTITION-LEVEL SORT, 706) - Reducer 4 <- Reducer 3 (GROUP, 186) - Reducer 5 <- Reducer 12 (PARTITION-LEVEL SORT, 444), Reducer 18 (PARTITION-LEVEL SORT, 444), Reducer 24 (PARTITION-LEVEL SORT, 444), Reducer 4 (PARTITION-LEVEL SORT, 444) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 10 <- Map 9 (PARTITION-LEVEL SORT, 706), Reducer 14 (PARTITION-LEVEL SORT, 706) + Reducer 11 <- Reducer 10 (GROUP, 186) + Reducer 12 <- Reducer 11 (PARTITION-LEVEL SORT, 204), Reducer 18 (PARTITION-LEVEL SORT, 204), Reducer 24 (PARTITION-LEVEL SORT, 204) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 154), Map 15 (PARTITION-LEVEL SORT, 154) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 975), Reducer 20 (PARTITION-LEVEL SORT, 975) + Reducer 18 <- Reducer 17 (GROUP, 481) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 975), Reducer 7 (PARTITION-LEVEL SORT, 975) + Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 398), Map 21 (PARTITION-LEVEL SORT, 398) + Reducer 23 <- Map 22 (PARTITION-LEVEL SORT, 706), Reducer 26 (PARTITION-LEVEL SORT, 706) + Reducer 24 <- Reducer 23 (GROUP, 186) + Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 154), Map 27 (PARTITION-LEVEL SORT, 154) + Reducer 3 <- Reducer 2 (GROUP, 481) + Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 417), Reducer 3 (PARTITION-LEVEL SORT, 417) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: customer + filterExpr: (c_customer_sk is not null and c_customer_id is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c_customer_sk is not null and c_customer_id is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Execution mode: vectorized + Map 13 Map Operator Tree: TableScan alias: web_sales @@ -173,14 +195,14 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 13 + Map 15 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year = 2002) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean) + predicate: ((d_year = 2002) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) @@ -193,7 +215,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 14 + Map 16 Map Operator Tree: TableScan alias: customer @@ -214,28 +236,28 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Execution mode: vectorized - Map 15 + Map 19 Map Operator Tree: TableScan - alias: web_sales - filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_net_paid (type: decimal(7,2)) + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_net_paid (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 19 + Map 21 Map Operator Tree: TableScan alias: date_dim @@ -255,7 +277,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 20 + Map 22 Map Operator Tree: TableScan alias: customer @@ -276,35 +298,35 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Execution mode: vectorized - Map 21 + Map 25 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_net_paid (type: decimal(7,2)) + expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_net_paid (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 25 + Map 27 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year = 2002) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_year = 2002) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean) + predicate: ((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) @@ -317,28 +339,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 26 + Map 6 Map Operator Tree: TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_customer_id is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_customer_sk is not null and c_customer_id is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_net_paid (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan alias: date_dim @@ -358,7 +380,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 + Map 9 Map Operator Tree: TableScan alias: customer @@ -379,28 +401,70 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Execution mode: vectorized - Map 9 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_net_paid (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) - Execution mode: vectorized Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col6 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col6) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)) + Reducer 11 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: decimal(7,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(7,2)) + Reducer 12 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3, _col5, _col6 + Statistics: Num rows: 255550078 Data size: 22544702094 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 255550078 Data size: 22544702094 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col5 (type: decimal(7,2)), _col6 (type: boolean) + Reducer 14 Reduce Operator Tree: Join Operator condition map: @@ -409,27 +473,27 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)) - Reducer 11 + Reducer 17 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col7 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col6 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(_col2) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string) + aggregations: max(_col6) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -441,7 +505,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: decimal(7,2)) - Reducer 12 + Reducer 18 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -468,7 +532,31 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)) - Reducer 16 + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col6 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col6) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)) + Reducer 20 Reduce Operator Tree: Join Operator condition map: @@ -477,27 +565,27 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)) - Reducer 17 + Reducer 23 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col7 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3, _col6 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(_col2) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string) + aggregations: max(_col6) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -509,7 +597,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: decimal(7,2)) - Reducer 18 + Reducer 24 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -536,7 +624,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(7,2)), _col2 (type: boolean) - Reducer 2 + Reducer 26 Reduce Operator Tree: Join Operator condition map: @@ -553,48 +641,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)) - Reducer 22 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)) - Reducer 23 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col7 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(_col2) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)) - Reducer 24 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -610,84 +657,36 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col7 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(_col2) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)) Reducer 4 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: decimal(7,2)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(7,2)) - Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 1 to 3 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - 3 _col0 (type: string) - outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1149975359 Data size: 101451160012 Basic stats: COMPLETE Column stats: NONE + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col7, _col9, _col10 + Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col6) THEN (((_col1 / _col5) > (_col10 / _col3))) ELSE (false) END) ELSE (false) END (type: boolean) - Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE + predicate: CASE WHEN (_col7 is not null) THEN (CASE WHEN (_col10) THEN (((_col5 / _col9) > (_col3 / _col7))) ELSE (false) END) ELSE (false) END (type: boolean) + Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col7 (type: string), _col8 (type: string), _col9 (type: string) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ - Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE @@ -698,6 +697,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query75.q.out b/ql/src/test/results/clientpositive/perf/spark/query75.q.out index 9f723523ac..b4e619b179 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query75.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query75.q.out @@ -162,54 +162,75 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 400), Map 16 (PARTITION-LEVEL SORT, 400) - Reducer 14 <- Map 10 (PARTITION-LEVEL SORT, 438), Reducer 13 (PARTITION-LEVEL SORT, 438) - Reducer 15 <- Map 18 (PARTITION-LEVEL SORT, 516), Reducer 14 (PARTITION-LEVEL SORT, 516) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 308), Map 16 (PARTITION-LEVEL SORT, 308) - Reducer 20 <- Map 16 (PARTITION-LEVEL SORT, 156), Map 19 (PARTITION-LEVEL SORT, 156) - Reducer 21 <- Map 10 (PARTITION-LEVEL SORT, 169), Reducer 20 (PARTITION-LEVEL SORT, 169) - Reducer 22 <- Map 25 (PARTITION-LEVEL SORT, 196), Reducer 21 (PARTITION-LEVEL SORT, 196) - Reducer 27 <- Map 26 (PARTITION-LEVEL SORT, 308), Map 32 (PARTITION-LEVEL SORT, 308) - Reducer 28 <- Map 33 (PARTITION-LEVEL SORT, 336), Reducer 27 (PARTITION-LEVEL SORT, 336) - Reducer 29 <- Map 34 (PARTITION-LEVEL SORT, 393), Reducer 28 (PARTITION-LEVEL SORT, 393) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 336), Reducer 2 (PARTITION-LEVEL SORT, 336) - Reducer 30 <- Reducer 29 (GROUP, 934), Reducer 38 (GROUP, 934) - Reducer 31 <- Reducer 30 (GROUP PARTITION-LEVEL SORT, 671), Reducer 45 (GROUP PARTITION-LEVEL SORT, 671) - Reducer 36 <- Map 32 (PARTITION-LEVEL SORT, 400), Map 35 (PARTITION-LEVEL SORT, 400) - Reducer 37 <- Map 33 (PARTITION-LEVEL SORT, 438), Reducer 36 (PARTITION-LEVEL SORT, 438) - Reducer 38 <- Map 41 (PARTITION-LEVEL SORT, 516), Reducer 37 (PARTITION-LEVEL SORT, 516) - Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 393), Reducer 3 (PARTITION-LEVEL SORT, 393) - Reducer 43 <- Map 32 (PARTITION-LEVEL SORT, 156), Map 42 (PARTITION-LEVEL SORT, 156) - Reducer 44 <- Map 33 (PARTITION-LEVEL SORT, 169), Reducer 43 (PARTITION-LEVEL SORT, 169) - Reducer 45 <- Map 48 (PARTITION-LEVEL SORT, 196), Reducer 44 (PARTITION-LEVEL SORT, 196) - Reducer 5 <- Reducer 15 (GROUP, 934), Reducer 4 (GROUP, 934) - Reducer 6 <- Reducer 22 (GROUP PARTITION-LEVEL SORT, 671), Reducer 5 (GROUP PARTITION-LEVEL SORT, 671) - Reducer 7 <- Reducer 31 (PARTITION-LEVEL SORT, 336), Reducer 6 (PARTITION-LEVEL SORT, 336) - Reducer 8 <- Reducer 7 (SORT, 1) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 516), Reducer 16 (PARTITION-LEVEL SORT, 516) + Reducer 15 <- Map 10 (PARTITION-LEVEL SORT, 400), Map 14 (PARTITION-LEVEL SORT, 400) + Reducer 16 <- Map 11 (PARTITION-LEVEL SORT, 438), Reducer 15 (PARTITION-LEVEL SORT, 438) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 393), Reducer 9 (PARTITION-LEVEL SORT, 393) + Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 196), Reducer 23 (PARTITION-LEVEL SORT, 196) + Reducer 22 <- Map 10 (PARTITION-LEVEL SORT, 156), Map 21 (PARTITION-LEVEL SORT, 156) + Reducer 23 <- Map 11 (PARTITION-LEVEL SORT, 169), Reducer 22 (PARTITION-LEVEL SORT, 169) + Reducer 27 <- Map 26 (PARTITION-LEVEL SORT, 393), Reducer 32 (PARTITION-LEVEL SORT, 393) + Reducer 28 <- Reducer 27 (GROUP, 934), Reducer 36 (GROUP, 934) + Reducer 29 <- Reducer 28 (GROUP PARTITION-LEVEL SORT, 671), Reducer 43 (GROUP PARTITION-LEVEL SORT, 671) + Reducer 3 <- Reducer 13 (GROUP, 934), Reducer 2 (GROUP, 934) + Reducer 31 <- Map 30 (PARTITION-LEVEL SORT, 308), Map 33 (PARTITION-LEVEL SORT, 308) + Reducer 32 <- Map 34 (PARTITION-LEVEL SORT, 336), Reducer 31 (PARTITION-LEVEL SORT, 336) + Reducer 36 <- Map 35 (PARTITION-LEVEL SORT, 516), Reducer 39 (PARTITION-LEVEL SORT, 516) + Reducer 38 <- Map 33 (PARTITION-LEVEL SORT, 400), Map 37 (PARTITION-LEVEL SORT, 400) + Reducer 39 <- Map 34 (PARTITION-LEVEL SORT, 438), Reducer 38 (PARTITION-LEVEL SORT, 438) + Reducer 4 <- Reducer 20 (GROUP PARTITION-LEVEL SORT, 671), Reducer 3 (GROUP PARTITION-LEVEL SORT, 671) + Reducer 43 <- Map 42 (PARTITION-LEVEL SORT, 196), Reducer 46 (PARTITION-LEVEL SORT, 196) + Reducer 45 <- Map 33 (PARTITION-LEVEL SORT, 156), Map 44 (PARTITION-LEVEL SORT, 156) + Reducer 46 <- Map 34 (PARTITION-LEVEL SORT, 169), Reducer 45 (PARTITION-LEVEL SORT, 169) + Reducer 5 <- Reducer 29 (PARTITION-LEVEL SORT, 336), Reducer 4 (PARTITION-LEVEL SORT, 336) + Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 308), Map 7 (PARTITION-LEVEL SORT, 308) + Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 336), Reducer 8 (PARTITION-LEVEL SORT, 336) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: catalog_returns + filterExpr: (cr_order_number is not null and cr_item_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + predicate: (cr_order_number is not null and cr_item_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_order_number (type: int), cs_quantity (type: int), cs_ext_sales_price (type: decimal(7,2)) + expressions: cr_item_sk (type: int), cr_order_number (type: int), cr_return_quantity (type: int), cr_return_amount (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) + Execution mode: vectorized + Map 10 + Map Operator Tree: + TableScan + alias: item + filterExpr: (i_item_sk is not null and i_brand_id is not null and i_class_id is not null and i_category_id is not null and i_manufact_id is not null and (i_category = 'Sports')) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (i_item_sk is not null and i_brand_id is not null and i_class_id is not null and i_category_id is not null and i_manufact_id is not null and (i_category = 'Sports')) (type: boolean) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized - Map 10 + Map 11 Map Operator Tree: TableScan alias: date_dim @@ -229,28 +250,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 11 + Map 12 Map Operator Tree: TableScan - alias: catalog_returns - filterExpr: (cr_order_number is not null and cr_item_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + alias: store_returns + filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cr_order_number is not null and cr_item_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + predicate: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cr_item_sk (type: int), cr_order_number (type: int), cr_return_quantity (type: int), cr_return_amount (type: decimal(7,2)) + expressions: sr_item_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int), sr_return_amt (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 12 + Map 14 Map Operator Tree: TableScan alias: store_sales @@ -271,49 +292,28 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map 16 - Map Operator Tree: - TableScan - alias: item - filterExpr: (i_item_sk is not null and i_brand_id is not null and i_class_id is not null and i_category_id is not null and i_manufact_id is not null and (i_category = 'Sports')) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (i_item_sk is not null and i_brand_id is not null and i_class_id is not null and i_category_id is not null and i_manufact_id is not null and (i_category = 'Sports')) (type: boolean) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) - Execution mode: vectorized - Map 18 + Map 19 Map Operator Tree: TableScan - alias: store_returns - filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + alias: web_returns + filterExpr: (wr_order_number is not null and wr_item_sk is not null) (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + predicate: (wr_order_number is not null and wr_item_sk is not null) (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: sr_item_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int), sr_return_amt (type: decimal(7,2)) + expressions: wr_item_sk (type: int), wr_order_number (type: int), wr_return_quantity (type: int), wr_return_amt (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 19 + Map 21 Map Operator Tree: TableScan alias: web_sales @@ -334,28 +334,28 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map 25 + Map 26 Map Operator Tree: TableScan - alias: web_returns - filterExpr: (wr_order_number is not null and wr_item_sk is not null) (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + alias: catalog_returns + filterExpr: (cr_order_number is not null and cr_item_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (wr_order_number is not null and wr_item_sk is not null) (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + predicate: (cr_order_number is not null and cr_item_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wr_item_sk (type: int), wr_order_number (type: int), wr_return_quantity (type: int), wr_return_amt (type: decimal(7,2)) + expressions: cr_item_sk (type: int), cr_order_number (type: int), cr_return_quantity (type: int), cr_return_amount (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 26 + Map 30 Map Operator Tree: TableScan alias: catalog_sales @@ -376,7 +376,7 @@ STAGE PLANS: Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map 32 + Map 33 Map Operator Tree: TableScan alias: item @@ -397,7 +397,7 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized - Map 33 + Map 34 Map Operator Tree: TableScan alias: date_dim @@ -417,28 +417,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 34 + Map 35 Map Operator Tree: TableScan - alias: catalog_returns - filterExpr: (cr_order_number is not null and cr_item_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + alias: store_returns + filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cr_order_number is not null and cr_item_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + predicate: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cr_item_sk (type: int), cr_order_number (type: int), cr_return_quantity (type: int), cr_return_amount (type: decimal(7,2)) + expressions: sr_item_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int), sr_return_amt (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 35 + Map 37 Map Operator Tree: TableScan alias: store_sales @@ -459,28 +459,28 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map 41 + Map 42 Map Operator Tree: TableScan - alias: store_returns - filterExpr: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + alias: web_returns + filterExpr: (wr_order_number is not null and wr_item_sk is not null) (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (sr_ticket_number is not null and sr_item_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + predicate: (wr_order_number is not null and wr_item_sk is not null) (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: sr_item_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int), sr_return_amt (type: decimal(7,2)) + expressions: wr_item_sk (type: int), wr_order_number (type: int), wr_return_quantity (type: int), wr_return_amt (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 42 + Map 44 Map Operator Tree: TableScan alias: web_sales @@ -501,28 +501,51 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Map 48 + Map 7 Map Operator Tree: TableScan - alias: web_returns - filterExpr: (wr_order_number is not null and wr_item_sk is not null) (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (wr_order_number is not null and wr_item_sk is not null) (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wr_item_sk (type: int), wr_order_number (type: int), wr_return_quantity (type: int), wr_return_amt (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_order_number (type: int), cs_quantity (type: int), cs_ext_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized Reducer 13 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + outputColumnNames: _col2, _col3, _col7, _col8, _col10, _col11, _col12, _col13 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col10 (type: int), _col11 (type: int), _col12 (type: int), _col13 (type: int), (_col7 - CASE WHEN (_col2 is not null) THEN (_col2) ELSE (0) END) (type: int), (_col8 - CASE WHEN (_col3 is not null) THEN (_col3) ELSE (0) END) (type: decimal(8,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2)) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1149964734 Data size: 119542589565 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2)) + null sort order: zzzzzz + sort order: ++++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2)) + Statistics: Num rows: 1149964734 Data size: 119542589565 Basic stats: COMPLETE Column stats: NONE + Reducer 15 Reduce Operator Tree: Join Operator condition map: @@ -539,7 +562,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) - Reducer 14 + Reducer 16 Reduce Operator Tree: Join Operator condition map: @@ -556,17 +579,17 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int), _col2 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) - Reducer 15 + Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 - outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col9, _col13, _col14 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Right Outer Join 0 to 1 + outputColumnNames: _col2, _col3, _col7, _col8, _col10, _col11, _col12, _col13 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) + expressions: _col10 (type: int), _col11 (type: int), _col12 (type: int), _col13 (type: int), (_col7 - CASE WHEN (_col2 is not null) THEN (_col2) ELSE (0) END) (type: int), (_col8 - CASE WHEN (_col3 is not null) THEN (_col3) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2)) minReductionHashAggr: 0.99 @@ -579,66 +602,15 @@ STAGE PLANS: sort order: ++++++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2)) Statistics: Num rows: 1149964734 Data size: 119542589565 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) Reducer 20 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) - Reducer 21 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) - Reducer 22 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join 0 to 1 - outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col9, _col13, _col14 + Right Outer Join 0 to 1 + outputColumnNames: _col2, _col3, _col7, _col8, _col10, _col11, _col12, _col13 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) + expressions: _col10 (type: int), _col11 (type: int), _col12 (type: int), _col13 (type: int), (_col7 - CASE WHEN (_col2 is not null) THEN (_col2) ELSE (0) END) (type: int), (_col8 - CASE WHEN (_col3 is not null) THEN (_col3) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -653,7 +625,7 @@ STAGE PLANS: sort order: ++++++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) Statistics: Num rows: 766649929 Data size: 85832540296 Basic stats: COMPLETE Column stats: NONE - Reducer 27 + Reducer 22 Reduce Operator Tree: Join Operator condition map: @@ -662,15 +634,15 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) - Reducer 28 + Reducer 23 Reduce Operator Tree: Join Operator condition map: @@ -679,23 +651,23 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) - Reducer 29 + Reducer 27 Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 - outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col9, _col13, _col14 + Right Outer Join 0 to 1 + outputColumnNames: _col2, _col3, _col7, _col8, _col10, _col11, _col12, _col13 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) + expressions: _col10 (type: int), _col11 (type: int), _col12 (type: int), _col13 (type: int), (_col7 - CASE WHEN (_col2 is not null) THEN (_col2) ELSE (0) END) (type: int), (_col8 - CASE WHEN (_col3 is not null) THEN (_col3) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -710,24 +682,7 @@ STAGE PLANS: sort order: ++++++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2)) Statistics: Num rows: 1149964734 Data size: 119542589565 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) - Reducer 30 + Reducer 28 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -747,7 +702,7 @@ STAGE PLANS: sort order: ++++++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) Statistics: Num rows: 766649929 Data size: 85832540296 Basic stats: COMPLETE Column stats: NONE - Reducer 31 + Reducer 29 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -768,7 +723,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) Statistics: Num rows: 191662482 Data size: 21458135046 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: decimal(18,2)) - Reducer 36 + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: int), KEY._col5 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 574982367 Data size: 59771294782 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2)) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 766649929 Data size: 85832540296 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2)) + null sort order: zzzzzz + sort order: ++++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 766649929 Data size: 85832540296 Basic stats: COMPLETE Column stats: NONE + Reducer 31 Reduce Operator Tree: Join Operator condition map: @@ -777,15 +752,15 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) - Reducer 37 + Reducer 32 Reduce Operator Tree: Join Operator condition map: @@ -794,23 +769,23 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) - Reducer 38 + Reducer 36 Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 - outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col9, _col13, _col14 + Right Outer Join 0 to 1 + outputColumnNames: _col2, _col3, _col7, _col8, _col10, _col11, _col12, _col13 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) + expressions: _col10 (type: int), _col11 (type: int), _col12 (type: int), _col13 (type: int), (_col7 - CASE WHEN (_col2 is not null) THEN (_col2) ELSE (0) END) (type: int), (_col8 - CASE WHEN (_col3 is not null) THEN (_col3) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -825,30 +800,7 @@ STAGE PLANS: sort order: ++++++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2)) Statistics: Num rows: 1149964734 Data size: 119542589565 Basic stats: COMPLETE Column stats: NONE - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join 0 to 1 - outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col9, _col13, _col14 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2)) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1149964734 Data size: 119542589565 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2)) - null sort order: zzzzzz - sort order: ++++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2)) - Statistics: Num rows: 1149964734 Data size: 119542589565 Basic stats: COMPLETE Column stats: NONE - Reducer 43 + Reducer 38 Reduce Operator Tree: Join Operator condition map: @@ -857,15 +809,15 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) - Reducer 44 + Reducer 39 Reduce Operator Tree: Join Operator condition map: @@ -874,23 +826,44 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) null sort order: zz sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) - Reducer 45 + Reducer 4 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: int), KEY._col5 (type: decimal(8,2)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 383324964 Data size: 42916270092 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4), sum(_col5) + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 191662482 Data size: 21458135046 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + null sort order: zzzz + sort order: ++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 191662482 Data size: 21458135046 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: bigint), _col5 (type: decimal(18,2)) + Reducer 43 Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 - outputColumnNames: _col3, _col4, _col6, _col7, _col8, _col9, _col13, _col14 + Right Outer Join 0 to 1 + outputColumnNames: _col2, _col3, _col7, _col8, _col10, _col11, _col12, _col13 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int), (_col3 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col14 is not null) THEN (_col14) ELSE (0) END) (type: decimal(8,2)) + expressions: _col10 (type: int), _col11 (type: int), _col12 (type: int), _col13 (type: int), (_col7 - CASE WHEN (_col2 is not null) THEN (_col2) ELSE (0) END) (type: int), (_col8 - CASE WHEN (_col3 is not null) THEN (_col3) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -905,48 +878,41 @@ STAGE PLANS: sort order: ++++++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) Statistics: Num rows: 766649929 Data size: 85832540296 Basic stats: COMPLETE Column stats: NONE - Reducer 5 - Execution mode: vectorized + Reducer 45 Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: int), KEY._col5 (type: decimal(8,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 574982367 Data size: 59771294782 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2)) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 766649929 Data size: 85832540296 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(8,2)) - null sort order: zzzzzz - sort order: ++++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 766649929 Data size: 85832540296 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Execution mode: vectorized + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) + Reducer 46 Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int), KEY._col4 (type: int), KEY._col5 (type: decimal(8,2)) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 383324964 Data size: 42916270092 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col4), sum(_col5) - keys: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 191662482 Data size: 21458135046 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) - null sort order: zzzz - sort order: ++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 191662482 Data size: 21458135046 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: bigint), _col5 (type: decimal(18,2)) - Reducer 7 + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) + Reducer 5 Reduce Operator Tree: Join Operator condition map: @@ -970,7 +936,7 @@ STAGE PLANS: Statistics: Num rows: 70276244 Data size: 7867982946 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col7 (type: decimal(19,2)) - Reducer 8 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -991,6 +957,40 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query76.q.out b/ql/src/test/results/clientpositive/perf/spark/query76.q.out index f70956f7fd..4ccad49378 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query76.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query76.q.out @@ -64,17 +64,38 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 85), Reducer 9 (PARTITION-LEVEL SORT, 85) - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 158), Map 16 (PARTITION-LEVEL SORT, 158) - Reducer 15 <- Map 17 (PARTITION-LEVEL SORT, 169), Reducer 14 (PARTITION-LEVEL SORT, 169) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 204), Map 6 (PARTITION-LEVEL SORT, 204) - Reducer 3 <- Map 12 (PARTITION-LEVEL SORT, 219), Reducer 2 (PARTITION-LEVEL SORT, 219) - Reducer 4 <- Reducer 10 (GROUP, 518), Reducer 15 (GROUP, 518), Reducer 3 (GROUP, 518) - Reducer 5 <- Reducer 4 (SORT, 1) - Reducer 9 <- Map 1 (PARTITION-LEVEL SORT, 82), Map 11 (PARTITION-LEVEL SORT, 82) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 82), Map 12 (PARTITION-LEVEL SORT, 82) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 169), Reducer 16 (PARTITION-LEVEL SORT, 169) + Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 158), Map 17 (PARTITION-LEVEL SORT, 158) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 219), Reducer 6 (PARTITION-LEVEL SORT, 219) + Reducer 3 <- Reducer 14 (GROUP, 518), Reducer 2 (GROUP, 518), Reducer 9 (GROUP, 518) + Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 6 <- Map 10 (PARTITION-LEVEL SORT, 204), Map 7 (PARTITION-LEVEL SORT, 204) + Reducer 9 <- Map 1 (PARTITION-LEVEL SORT, 85), Reducer 11 (PARTITION-LEVEL SORT, 85) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_year (type: int), d_qoy (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Execution mode: vectorized + Map 10 Map Operator Tree: TableScan alias: item @@ -95,7 +116,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 11 + Map 12 Map Operator Tree: TableScan alias: web_sales @@ -116,7 +137,7 @@ STAGE PLANS: Statistics: Num rows: 72001334 Data size: 9790099106 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 12 + Map 13 Map Operator Tree: TableScan alias: date_dim @@ -137,7 +158,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 13 + Map 15 Map Operator Tree: TableScan alias: item @@ -158,7 +179,7 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 16 + Map 17 Map Operator Tree: TableScan alias: catalog_sales @@ -179,28 +200,7 @@ STAGE PLANS: Statistics: Num rows: 143994918 Data size: 19499804476 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 17 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), d_year (type: int), d_qoy (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int) - Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: store_sales @@ -221,33 +221,7 @@ STAGE PLANS: Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Reducer 10 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - outputColumnNames: _col1, _col4, _col6, _col7 - Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 'web' (type: string), 'ws_web_page_sk' (type: string), _col6 (type: int), _col7 (type: int), _col1 (type: string), _col4 (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(_col5) - keys: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 609832848 Data size: 66183559909 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) - null sort order: zzzzz - sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) - Statistics: Num rows: 609832848 Data size: 66183559909 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col5 (type: bigint), _col6 (type: decimal(17,2)) - Reducer 14 + Reducer 11 Reduce Operator Tree: Join Operator condition map: @@ -256,23 +230,23 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) outputColumnNames: _col1, _col2, _col4 - Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col4 (type: decimal(7,2)) - Reducer 15 + Reducer 14 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - outputColumnNames: _col1, _col4, _col6, _col7 + outputColumnNames: _col1, _col2, _col4, _col7 Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 'catalog' (type: string), 'cs_warehouse_sk' (type: string), _col6 (type: int), _col7 (type: int), _col1 (type: string), _col4 (type: decimal(7,2)) + expressions: 'catalog' (type: string), 'cs_warehouse_sk' (type: string), _col1 (type: int), _col2 (type: int), _col4 (type: string), _col7 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -290,7 +264,7 @@ STAGE PLANS: Statistics: Num rows: 609832848 Data size: 66183559909 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col5 (type: bigint), _col6 (type: decimal(17,2)) - Reducer 2 + Reducer 16 Reduce Operator Tree: Join Operator condition map: @@ -299,23 +273,23 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) outputColumnNames: _col1, _col2, _col4 - Statistics: Num rows: 316797605 Data size: 27947976704 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 316797605 Data size: 27947976704 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col4 (type: decimal(7,2)) - Reducer 3 + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - outputColumnNames: _col1, _col4, _col6, _col7 + outputColumnNames: _col1, _col2, _col4, _col7 Statistics: Num rows: 348477373 Data size: 30742775040 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 'store' (type: string), 'ss_addr_sk' (type: string), _col6 (type: int), _col7 (type: int), _col1 (type: string), _col4 (type: decimal(7,2)) + expressions: 'store' (type: string), 'ss_addr_sk' (type: string), _col1 (type: int), _col2 (type: int), _col4 (type: string), _col7 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 348477373 Data size: 30742775040 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -333,7 +307,7 @@ STAGE PLANS: Statistics: Num rows: 609832848 Data size: 66183559909 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col5 (type: bigint), _col6 (type: decimal(17,2)) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -349,7 +323,7 @@ STAGE PLANS: Statistics: Num rows: 304916424 Data size: 33091779954 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col5 (type: bigint), _col6 (type: decimal(17,2)) - Reducer 5 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -366,7 +340,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 + Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -375,14 +349,40 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) outputColumnNames: _col1, _col2, _col4 - Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316797605 Data size: 27947976704 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316797605 Data size: 27947976704 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col4 (type: decimal(7,2)) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col2, _col4, _col7 + Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'web' (type: string), 'ws_web_page_sk' (type: string), _col1 (type: int), _col2 (type: int), _col4 (type: string), _col7 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), sum(_col5) + keys: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 609832848 Data size: 66183559909 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + null sort order: zzzzz + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: string) + Statistics: Num rows: 609832848 Data size: 66183559909 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col5 (type: bigint), _col6 (type: decimal(17,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query77.q.out b/ql/src/test/results/clientpositive/perf/spark/query77.q.out index bd4d2b61cf..0b8469499e 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query77.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query77.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[182][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[186][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain with ss as (select s_store_sk, @@ -247,7 +247,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 1 Map Operator Tree: TableScan alias: store @@ -262,8 +262,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -272,7 +272,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 13 + Map 9 Map Operator Tree: TableScan alias: store @@ -287,8 +287,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -405,7 +405,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 25 + Map 20 Map Operator Tree: TableScan alias: web_page @@ -420,8 +420,8 @@ STAGE PLANS: Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -430,7 +430,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 30 + Map 26 Map Operator Tree: TableScan alias: web_page @@ -445,8 +445,8 @@ STAGE PLANS: Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -454,43 +454,43 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 35), Map 9 (PARTITION-LEVEL SORT, 35) - Reducer 11 <- Reducer 10 (GROUP, 43) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 35), Map 13 (PARTITION-LEVEL SORT, 35) + Reducer 12 <- Reducer 11 (GROUP, 43) Reducer 15 <- Map 14 (GROUP, 336) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 21 <- Map 12 (PARTITION-LEVEL SORT, 11), Map 20 (PARTITION-LEVEL SORT, 11) - Reducer 22 <- Reducer 21 (GROUP, 13) - Reducer 23 <- Reducer 22 (PARTITION-LEVEL SORT, 99), Reducer 28 (PARTITION-LEVEL SORT, 99) - Reducer 27 <- Map 26 (PARTITION-LEVEL SORT, 154), Map 29 (PARTITION-LEVEL SORT, 154) - Reducer 28 <- Reducer 27 (GROUP, 186) - Reducer 3 <- Reducer 2 (GROUP, 481) - Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 262), Reducer 3 (PARTITION-LEVEL SORT, 262) - Reducer 5 <- Reducer 15 (GROUP, 1009), Reducer 23 (GROUP, 1009), Reducer 4 (GROUP, 1009) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 22 <- Map 13 (PARTITION-LEVEL SORT, 11), Map 21 (PARTITION-LEVEL SORT, 11) + Reducer 23 <- Reducer 22 (GROUP, 13) + Reducer 24 <- Reducer 23 (PARTITION-LEVEL SORT, 99), Reducer 29 (PARTITION-LEVEL SORT, 99) + Reducer 28 <- Map 27 (PARTITION-LEVEL SORT, 154), Map 30 (PARTITION-LEVEL SORT, 154) + Reducer 29 <- Reducer 28 (GROUP, 186) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) + Reducer 4 <- Reducer 3 (GROUP, 481) + Reducer 5 <- Reducer 12 (PARTITION-LEVEL SORT, 262), Reducer 4 (PARTITION-LEVEL SORT, 262) + Reducer 6 <- Reducer 15 (GROUP, 1009), Reducer 24 (GROUP, 1009), Reducer 5 (GROUP, 1009) + Reducer 7 <- Reducer 6 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 10 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: store_returns + filterExpr: (sr_returned_date_sk is not null and sr_store_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (sr_returned_date_sk is not null and sr_store_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)) + expressions: sr_returned_date_sk (type: int), sr_store_sk (type: int), sr_return_amt (type: decimal(7,2)), sr_net_loss (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 12 + Map 13 Map Operator Tree: TableScan alias: date_dim @@ -550,7 +550,28 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 20 + Map 2 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) + Execution mode: vectorized + Map 21 Map Operator Tree: TableScan alias: web_returns @@ -571,7 +592,7 @@ STAGE PLANS: Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 26 + Map 27 Map Operator Tree: TableScan alias: web_sales @@ -592,7 +613,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 29 + Map 30 Map Operator Tree: TableScan alias: date_dim @@ -612,7 +633,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan alias: date_dim @@ -632,28 +653,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 9 - Map Operator Tree: - TableScan - alias: store_returns - filterExpr: (sr_returned_date_sk is not null and sr_store_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sr_returned_date_sk is not null and sr_store_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sr_returned_date_sk (type: int), sr_store_sk (type: int), sr_return_amt (type: decimal(7,2)), sr_net_loss (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) - Execution mode: vectorized - Reducer 10 + Reducer 11 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -669,15 +669,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col3, _col4 input vertices: - 1 Map 13 + 0 Map 9 Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2), sum(_col3) - keys: _col5 (type: int) + aggregations: sum(_col3), sum(_col4) + keys: _col0 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -689,7 +689,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 11 + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -745,43 +745,7 @@ STAGE PLANS: Statistics: Num rows: 1912659936 Data size: 311808612993 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) - Reducer 2 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5 - input vertices: - 1 Map 8 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2), sum(_col3) - keys: _col5 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 21 + Reducer 22 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -797,15 +761,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col3, _col4 input vertices: - 1 Map 25 + 0 Map 20 Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2), sum(_col3) - keys: _col5 (type: int) + aggregations: sum(_col3), sum(_col4) + keys: _col0 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -817,7 +781,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 22 + Reducer 23 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -833,7 +797,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 8711072 Data size: 801742469 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 23 + Reducer 24 Reduce Operator Tree: Join Operator condition map: @@ -859,7 +823,7 @@ STAGE PLANS: Statistics: Num rows: 1912659936 Data size: 311808612993 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) - Reducer 27 + Reducer 28 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -875,15 +839,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col3, _col4 input vertices: - 1 Map 30 + 0 Map 26 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2), sum(_col3) - keys: _col5 (type: int) + aggregations: sum(_col3), sum(_col4) + keys: _col0 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -895,7 +859,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 28 + Reducer 29 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -912,6 +876,42 @@ STAGE PLANS: Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) Reducer 3 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col3, _col4 + input vertices: + 0 Map 1 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col3), sum(_col4) + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -927,7 +927,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 4 + Reducer 5 Reduce Operator Tree: Join Operator condition map: @@ -953,7 +953,7 @@ STAGE PLANS: Statistics: Num rows: 1912659936 Data size: 311808612993 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -974,7 +974,7 @@ STAGE PLANS: Statistics: Num rows: 956329968 Data size: 155904306496 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(28,2)) - Reducer 6 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query79.q.out b/ql/src/test/results/clientpositive/perf/spark/query79.q.out index d47eb0f727..0b32cab116 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query79.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query79.q.out @@ -64,43 +64,43 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 4 Map Operator Tree: TableScan - alias: store - filterExpr: (s_number_employees BETWEEN 200 AND 295 and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: (((hd_dep_count = 8) or (hd_vehicle_count > 0)) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (s_number_employees BETWEEN 200 AND 295 and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + predicate: (((hd_dep_count = 8) or (hd_vehicle_count > 0)) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 6000 Data size: 642000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: s_store_sk (type: int), s_city (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + expressions: hd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6000 Data size: 642000 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col4 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 9 + Map 5 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: (((hd_dep_count = 8) or (hd_vehicle_count > 0)) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: store + filterExpr: (s_number_employees BETWEEN 200 AND 295 and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((hd_dep_count = 8) or (hd_vehicle_count > 0)) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 6000 Data size: 642000 Basic stats: COMPLETE Column stats: NONE + predicate: (s_number_employees BETWEEN 200 AND 295 and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6000 Data size: 642000 Basic stats: COMPLETE Column stats: NONE + expressions: s_store_sk (type: int), s_city (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -108,10 +108,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 802), Reducer 6 (PARTITION-LEVEL SORT, 802) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 802), Reducer 8 (PARTITION-LEVEL SORT, 802) Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 6 <- Reducer 5 (GROUP, 529) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) + Reducer 8 <- Reducer 7 (GROUP, 529) #### A masked pattern was here #### Vertices: Map 1 @@ -135,7 +135,7 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 4 + Map 6 Map Operator Tree: TableScan alias: store_sales @@ -156,7 +156,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) Execution mode: vectorized - Map 7 + Map 9 Map Operator Tree: TableScan alias: date_dim @@ -214,7 +214,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 7 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -230,25 +230,25 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col5, _col6, _col7, _col10 + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col1, _col3, _col4, _col5, _col7, _col8, _col9 input vertices: - 1 Map 8 + 0 Map 5 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col5, _col6, _col7, _col10 + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col2, _col4, _col6, _col8, _col9, _col10 input vertices: - 1 Map 9 + 0 Map 4 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col6), sum(_col7) - keys: _col1 (type: int), _col10 (type: string), _col3 (type: int), _col5 (type: int) + aggregations: sum(_col9), sum(_col10) + keys: _col4 (type: int), _col2 (type: string), _col6 (type: int), _col8 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -260,7 +260,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)) - Reducer 6 + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query80.q.out b/ql/src/test/results/clientpositive/perf/spark/query80.q.out index f54832e3ea..accdaa4a74 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query80.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query80.q.out @@ -228,7 +228,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 12 + Map 1 Map Operator Tree: TableScan alias: store @@ -243,8 +243,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col3 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -253,7 +253,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 10 + Map 12 Map Operator Tree: TableScan alias: date_dim @@ -278,7 +278,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 21 + Map 23 Map Operator Tree: TableScan alias: date_dim @@ -303,7 +303,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 33 + Map 24 Map Operator Tree: TableScan alias: web_site @@ -318,8 +318,8 @@ STAGE PLANS: Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col3 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -328,7 +328,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 31 + Map 33 Map Operator Tree: TableScan alias: date_dim @@ -352,45 +352,86 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 329), Map 19 (PARTITION-LEVEL SORT, 329) - Reducer 15 <- Map 20 (PARTITION-LEVEL SORT, 336), Reducer 14 (PARTITION-LEVEL SORT, 336) - Reducer 16 <- Map 11 (PARTITION-LEVEL SORT, 408), Reducer 15 (PARTITION-LEVEL SORT, 408) - Reducer 17 <- Map 23 (PARTITION-LEVEL SORT, 447), Reducer 16 (PARTITION-LEVEL SORT, 447) - Reducer 18 <- Reducer 17 (GROUP, 491) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 432), Map 8 (PARTITION-LEVEL SORT, 432) - Reducer 25 <- Map 24 (PARTITION-LEVEL SORT, 164), Map 29 (PARTITION-LEVEL SORT, 164) - Reducer 26 <- Map 30 (PARTITION-LEVEL SORT, 169), Reducer 25 (PARTITION-LEVEL SORT, 169) - Reducer 27 <- Map 32 (PARTITION-LEVEL SORT, 206), Reducer 26 (PARTITION-LEVEL SORT, 206) - Reducer 28 <- Reducer 27 (GROUP, 247) - Reducer 3 <- Map 20 (PARTITION-LEVEL SORT, 437), Reducer 2 (PARTITION-LEVEL SORT, 437) - Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 531), Reducer 3 (PARTITION-LEVEL SORT, 531) - Reducer 5 <- Reducer 4 (GROUP, 640) - Reducer 6 <- Reducer 18 (GROUP, 1009), Reducer 28 (GROUP, 1009), Reducer 5 (GROUP, 1009) - Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 447), Reducer 17 (PARTITION-LEVEL SORT, 447) + Reducer 15 <- Reducer 14 (GROUP, 491) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 408), Reducer 20 (PARTITION-LEVEL SORT, 408) + Reducer 19 <- Map 18 (PARTITION-LEVEL SORT, 329), Map 21 (PARTITION-LEVEL SORT, 329) + Reducer 20 <- Map 11 (PARTITION-LEVEL SORT, 336), Reducer 19 (PARTITION-LEVEL SORT, 336) + Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 206), Reducer 30 (PARTITION-LEVEL SORT, 206) + Reducer 27 <- Reducer 26 (GROUP, 247) + Reducer 29 <- Map 28 (PARTITION-LEVEL SORT, 164), Map 31 (PARTITION-LEVEL SORT, 164) + Reducer 3 <- Map 16 (PARTITION-LEVEL SORT, 531), Reducer 9 (PARTITION-LEVEL SORT, 531) + Reducer 30 <- Map 32 (PARTITION-LEVEL SORT, 169), Reducer 29 (PARTITION-LEVEL SORT, 169) + Reducer 4 <- Reducer 3 (GROUP, 640) + Reducer 5 <- Reducer 15 (GROUP, 1009), Reducer 27 (GROUP, 1009), Reducer 4 (GROUP, 1009) + Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 432), Map 7 (PARTITION-LEVEL SORT, 432) + Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 437), Reducer 8 (PARTITION-LEVEL SORT, 437) #### A masked pattern was here #### Vertices: - Map 1 + Map 10 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_promo_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: store_returns + filterExpr: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_promo_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_promo_sk (type: int), ss_ticket_number (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: sr_item_sk (type: int), sr_ticket_number (type: int), sr_return_amt (type: decimal(7,2)), sr_net_loss (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col4 (type: int) + key expressions: _col0 (type: int), _col1 (type: int) null sort order: zz sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col4 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Execution mode: vectorized Map 11 + Map Operator Tree: + TableScan + alias: promotion + filterExpr: (p_promo_sk is not null and (p_channel_tv = 'N')) (type: boolean) + Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_promo_sk is not null and (p_channel_tv = 'N')) (type: boolean) + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_promo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 13 + Map Operator Tree: + TableScan + alias: catalog_page + filterExpr: cp_catalog_page_sk is not null (type: boolean) + Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cp_catalog_page_sk is not null (type: boolean) + Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cp_catalog_page_sk (type: int), cp_catalog_page_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized + Map 16 Map Operator Tree: TableScan alias: item @@ -410,7 +451,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 13 + Map 18 Map Operator Tree: TableScan alias: catalog_sales @@ -431,7 +472,7 @@ STAGE PLANS: Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Execution mode: vectorized - Map 19 + Map 21 Map Operator Tree: TableScan alias: catalog_returns @@ -452,48 +493,27 @@ STAGE PLANS: Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 20 + Map 25 Map Operator Tree: TableScan - alias: promotion - filterExpr: (p_promo_sk is not null and (p_channel_tv = 'N')) (type: boolean) - Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE + alias: item + filterExpr: ((i_current_price > 50) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_promo_sk is not null and (p_channel_tv = 'N')) (type: boolean) - Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE + predicate: ((i_current_price > 50) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_promo_sk (type: int) + expressions: i_item_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 23 - Map Operator Tree: - TableScan - alias: catalog_page - filterExpr: cp_catalog_page_sk is not null (type: boolean) - Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: cp_catalog_page_sk is not null (type: boolean) - Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cp_catalog_page_sk (type: int), cp_catalog_page_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 46000 Data size: 21198808 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 24 + Map 28 Map Operator Tree: TableScan alias: web_sales @@ -514,7 +534,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Execution mode: vectorized - Map 29 + Map 31 Map Operator Tree: TableScan alias: web_returns @@ -535,7 +555,7 @@ STAGE PLANS: Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 30 + Map 32 Map Operator Tree: TableScan alias: promotion @@ -555,122 +575,39 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1150 Data size: 1356710 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 32 - Map Operator Tree: - TableScan - alias: item - filterExpr: ((i_current_price > 50) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((i_current_price > 50) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 154000 Data size: 221186819 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 8 + Map 7 Map Operator Tree: TableScan - alias: store_returns - filterExpr: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_promo_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (sr_item_sk is not null and sr_ticket_number is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_promo_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: sr_item_sk (type: int), sr_ticket_number (type: int), sr_return_amt (type: decimal(7,2)), sr_net_loss (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_promo_sk (type: int), ss_ticket_number (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) + key expressions: _col1 (type: int), _col4 (type: int) null sort order: zz sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) + Map-reduce partition columns: _col1 (type: int), _col4 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Execution mode: vectorized Reducer 14 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col2 (type: int), _col4 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 15 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col9, _col10 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col5, _col6, _col9, _col10 - input vertices: - 1 Map 21 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 16 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col5, _col6, _col9, _col10 - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 17 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col6, _col9, _col10, _col15 + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col8, _col9, _col12, _col13 Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col15 (type: string), _col5 (type: decimal(7,2)), CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END (type: decimal(7,2)), (_col6 - CASE WHEN (_col10 is not null) THEN (_col10) ELSE (0) END) (type: decimal(8,2)) + expressions: _col1 (type: string), _col8 (type: decimal(7,2)), CASE WHEN (_col12 is not null) THEN (_col12) ELSE (0) END (type: decimal(7,2)), (_col9 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -687,7 +624,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(18,2)) - Reducer 18 + Reducer 15 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -715,41 +652,41 @@ STAGE PLANS: Statistics: Num rows: 2435062716 Data size: 264270971781 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) - Reducer 2 + Reducer 17 Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col4 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col2, _col6, _col7, _col10, _col11 + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int) + key expressions: _col2 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 25 + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE + value expressions: _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col11 (type: decimal(7,2)) + Reducer 19 Reduce Operator Tree: Join Operator condition map: Left Outer Join 0 to 1 keys: - 0 _col1 (type: int), _col4 (type: int) + 0 _col2 (type: int), _col4 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 26 + Reducer 20 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -760,7 +697,7 @@ STAGE PLANS: 0 _col3 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col9, _col10 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -769,16 +706,16 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col5, _col6, _col9, _col10 input vertices: - 1 Map 31 - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + 1 Map 23 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col2 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 27 + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 26 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -786,22 +723,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col9, _col10 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col3, _col6, _col7, _col10, _col11 Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col6, _col9, _col10, _col15 + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col1, _col8, _col9, _col12, _col13 input vertices: - 1 Map 33 + 0 Map 24 Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col15 (type: string), _col5 (type: decimal(7,2)), CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END (type: decimal(7,2)), (_col6 - CASE WHEN (_col10 is not null) THEN (_col10) ELSE (0) END) (type: decimal(8,2)) + expressions: _col1 (type: string), _col8 (type: decimal(7,2)), CASE WHEN (_col12 is not null) THEN (_col12) ELSE (0) END (type: decimal(7,2)), (_col9 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -818,7 +755,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(18,2)) - Reducer 28 + Reducer 27 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -846,36 +783,24 @@ STAGE PLANS: Statistics: Num rows: 2435062716 Data size: 264270971781 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) - Reducer 3 - Local Work: - Map Reduce Local Work + Reducer 29 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Outer Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col9, _col10 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col5, _col6, _col9, _col10 - input vertices: - 1 Map 10 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) - Reducer 4 + 0 _col1 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -883,22 +808,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6, _col9, _col10 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col3, _col6, _col7, _col10, _col11 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col6, _col9, _col10, _col15 + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col1, _col8, _col9, _col12, _col13 input vertices: - 1 Map 12 + 0 Map 1 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col15 (type: string), _col5 (type: decimal(7,2)), CASE WHEN (_col9 is not null) THEN (_col9) ELSE (0) END (type: decimal(7,2)), (_col6 - CASE WHEN (_col10 is not null) THEN (_col10) ELSE (0) END) (type: decimal(8,2)) + expressions: _col1 (type: string), _col8 (type: decimal(7,2)), CASE WHEN (_col12 is not null) THEN (_col12) ELSE (0) END (type: decimal(7,2)), (_col9 - CASE WHEN (_col13 is not null) THEN (_col13) ELSE (0) END) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -915,7 +840,36 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 927646829 Data size: 81837272625 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(18,2)) - Reducer 5 + Reducer 30 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col9, _col10 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col5, _col6, _col9, _col10 + input vertices: + 1 Map 33 + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -943,7 +897,7 @@ STAGE PLANS: Statistics: Num rows: 2435062716 Data size: 264270971781 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -964,7 +918,7 @@ STAGE PLANS: Statistics: Num rows: 1217531358 Data size: 132135485890 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(28,2)) - Reducer 7 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -981,6 +935,52 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col9, _col10 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Reducer 9 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col9, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col5, _col6, _col9, _col10 + input vertices: + 1 Map 12 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query81.q.out b/ql/src/test/results/clientpositive/perf/spark/query81.q.out index 8845e759ee..24695a2f73 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query81.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query81.q.out @@ -76,16 +76,16 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Reducer 16 (PARTITION-LEVEL SORT, 262), Reducer 9 (PARTITION-LEVEL SORT, 262) - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 25), Map 17 (PARTITION-LEVEL SORT, 25) - Reducer 15 <- Map 18 (PARTITION-LEVEL SORT, 344), Reducer 14 (PARTITION-LEVEL SORT, 344) - Reducer 16 <- Reducer 15 (GROUP PARTITION-LEVEL SORT, 349) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 25), Map 12 (PARTITION-LEVEL SORT, 25) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 344), Reducer 17 (PARTITION-LEVEL SORT, 344) + Reducer 15 <- Reducer 14 (GROUP PARTITION-LEVEL SORT, 349) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 25), Map 18 (PARTITION-LEVEL SORT, 25) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 5 (PARTITION-LEVEL SORT, 697) - Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 656), Reducer 2 (PARTITION-LEVEL SORT, 656) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 656), Reducer 9 (PARTITION-LEVEL SORT, 656) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 7 <- Map 11 (PARTITION-LEVEL SORT, 25), Map 6 (PARTITION-LEVEL SORT, 25) - Reducer 8 <- Map 12 (PARTITION-LEVEL SORT, 344), Reducer 7 (PARTITION-LEVEL SORT, 344) - Reducer 9 <- Reducer 8 (GROUP, 349) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 344), Reducer 11 (PARTITION-LEVEL SORT, 344) + Reducer 8 <- Reducer 7 (GROUP, 349) + Reducer 9 <- Reducer 15 (PARTITION-LEVEL SORT, 262), Reducer 8 (PARTITION-LEVEL SORT, 262) #### A masked pattern was here #### Vertices: Map 1 @@ -109,7 +109,28 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Execution mode: vectorized - Map 11 + Map 10 + Map Operator Tree: + TableScan + alias: catalog_returns + filterExpr: (cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_returning_addr_sk (type: int), cr_return_amt_inc_tax (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Execution mode: vectorized + Map 12 Map Operator Tree: TableScan alias: date_dim @@ -129,7 +150,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 12 + Map 13 Map Operator Tree: TableScan alias: customer_address @@ -150,7 +171,7 @@ STAGE PLANS: Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 13 + Map 16 Map Operator Tree: TableScan alias: catalog_returns @@ -171,7 +192,7 @@ STAGE PLANS: Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 17 + Map 18 Map Operator Tree: TableScan alias: date_dim @@ -191,27 +212,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 18 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: (ca_address_sk is not null and ca_state is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ca_address_sk is not null and ca_state is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int), ca_state (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized Map 5 Map Operator Tree: TableScan @@ -236,49 +236,25 @@ STAGE PLANS: Map 6 Map Operator Tree: TableScan - alias: catalog_returns - filterExpr: (cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: (ca_address_sk is not null and ca_state is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cr_returned_date_sk is not null and cr_returning_addr_sk is not null and cr_returning_customer_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + predicate: (ca_address_sk is not null and ca_state is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_returning_addr_sk (type: int), cr_return_amt_inc_tax (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + expressions: ca_address_sk (type: int), ca_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Reducer 10 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col2 > _col3) (type: boolean) - Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: decimal(17,2)) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) - Reducer 14 + Reducer 11 Reduce Operator Tree: Join Operator condition map: @@ -295,19 +271,19 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - Reducer 15 + Reducer 14 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col6 + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col3, _col5 Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col3) - keys: _col6 (type: string), _col1 (type: int) + aggregations: sum(_col5) + keys: _col1 (type: string), _col3 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -319,7 +295,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) - Reducer 16 + Reducer 15 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -352,6 +328,23 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(24,7)) + Reducer 17 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) Reducer 2 Reduce Operator Tree: Join Operator @@ -417,29 +410,12 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: decimal(7,2)) - Reducer 8 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col6 + 1 _col2 (type: int) + outputColumnNames: _col1, _col3, _col5 Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col3) - keys: _col6 (type: string), _col1 (type: int) + aggregations: sum(_col5) + keys: _col1 (type: string), _col3 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -451,7 +427,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)) - Reducer 9 + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -474,6 +450,30 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: decimal(17,2)) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 > _col3) (type: boolean) + Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: decimal(17,2)) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8066666 Data size: 8186697393 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query83.q.out b/ql/src/test/results/clientpositive/perf/spark/query83.q.out index d8978b95e2..bfd0cc0f89 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query83.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query83.q.out @@ -150,50 +150,72 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Reducer 9 (GROUP, 2) - Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 2), Reducer 21 (PARTITION-LEVEL SORT, 2) - Reducer 16 <- Map 23 (PARTITION-LEVEL SORT, 36), Reducer 15 (PARTITION-LEVEL SORT, 36) - Reducer 17 <- Map 24 (PARTITION-LEVEL SORT, 44), Reducer 16 (PARTITION-LEVEL SORT, 44) - Reducer 18 <- Reducer 17 (GROUP, 43) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 10 (PARTITION-LEVEL SORT, 2) - Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 2), Map 22 (PARTITION-LEVEL SORT, 2) - Reducer 21 <- Reducer 20 (GROUP, 2) - Reducer 26 <- Map 25 (PARTITION-LEVEL SORT, 2), Reducer 32 (PARTITION-LEVEL SORT, 2) - Reducer 27 <- Map 34 (PARTITION-LEVEL SORT, 12), Reducer 26 (PARTITION-LEVEL SORT, 12) - Reducer 28 <- Map 35 (PARTITION-LEVEL SORT, 17), Reducer 27 (PARTITION-LEVEL SORT, 17) - Reducer 29 <- Reducer 28 (GROUP, 13) - Reducer 3 <- Map 12 (PARTITION-LEVEL SORT, 25), Reducer 2 (PARTITION-LEVEL SORT, 25) - Reducer 31 <- Map 30 (PARTITION-LEVEL SORT, 2), Map 33 (PARTITION-LEVEL SORT, 2) - Reducer 32 <- Reducer 31 (GROUP, 2) - Reducer 4 <- Map 13 (PARTITION-LEVEL SORT, 32), Reducer 3 (PARTITION-LEVEL SORT, 32) - Reducer 5 <- Reducer 4 (GROUP, 29) - Reducer 6 <- Reducer 18 (PARTITION-LEVEL SORT, 42), Reducer 29 (PARTITION-LEVEL SORT, 42), Reducer 5 (PARTITION-LEVEL SORT, 42) - Reducer 7 <- Reducer 6 (SORT, 1) - Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 2), Map 13 (PARTITION-LEVEL SORT, 2) + Reducer 12 <- Reducer 11 (GROUP, 2) + Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 32), Reducer 19 (PARTITION-LEVEL SORT, 32) + Reducer 16 <- Reducer 15 (GROUP, 29) + Reducer 17 <- Reducer 16 (PARTITION-LEVEL SORT, 36), Reducer 28 (PARTITION-LEVEL SORT, 36) + Reducer 19 <- Map 18 (PARTITION-LEVEL SORT, 25), Reducer 21 (PARTITION-LEVEL SORT, 25) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 17), Reducer 7 (PARTITION-LEVEL SORT, 17) + Reducer 21 <- Map 20 (PARTITION-LEVEL SORT, 2), Reducer 24 (PARTITION-LEVEL SORT, 2) + Reducer 23 <- Map 22 (PARTITION-LEVEL SORT, 2), Map 25 (PARTITION-LEVEL SORT, 2) + Reducer 24 <- Reducer 23 (GROUP, 2) + Reducer 27 <- Map 26 (PARTITION-LEVEL SORT, 44), Reducer 30 (PARTITION-LEVEL SORT, 44) + Reducer 28 <- Reducer 27 (GROUP, 43) + Reducer 3 <- Reducer 2 (GROUP, 13) + Reducer 30 <- Map 29 (PARTITION-LEVEL SORT, 36), Reducer 32 (PARTITION-LEVEL SORT, 36) + Reducer 32 <- Map 31 (PARTITION-LEVEL SORT, 2), Reducer 35 (PARTITION-LEVEL SORT, 2) + Reducer 34 <- Map 33 (PARTITION-LEVEL SORT, 2), Map 36 (PARTITION-LEVEL SORT, 2) + Reducer 35 <- Reducer 34 (GROUP, 2) + Reducer 4 <- Reducer 17 (PARTITION-LEVEL SORT, 30), Reducer 3 (PARTITION-LEVEL SORT, 30) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 12), Reducer 9 (PARTITION-LEVEL SORT, 12) + Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 12 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: item + filterExpr: (i_item_sk is not null and i_item_id is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (i_item_sk is not null and i_item_id is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized + Map 10 Map Operator Tree: TableScan alias: date_dim - filterExpr: (d_date_sk is not null and d_date is not null) (type: boolean) + filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date_sk is not null and d_date is not null) (type: boolean) + predicate: (d_week_seq is not null and d_date is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_date (type: string) + expressions: d_date (type: string), d_week_seq (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col1 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col0 (type: string) Execution mode: vectorized - Map 11 + Map 13 Map Operator Tree: TableScan alias: date_dim @@ -219,28 +241,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 12 - Map Operator Tree: - TableScan - alias: catalog_returns - filterExpr: (cr_item_sk is not null and cr_returned_date_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cr_item_sk is not null and cr_returned_date_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cr_returned_date_sk (type: int), cr_item_sk (type: int), cr_return_quantity (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int) - Execution mode: vectorized - Map 13 + Map 14 Map Operator Tree: TableScan alias: item @@ -261,7 +262,28 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 14 + Map 18 + Map Operator Tree: + TableScan + alias: catalog_returns + filterExpr: (cr_item_sk is not null and cr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cr_item_sk is not null and cr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cr_returned_date_sk (type: int), cr_item_sk (type: int), cr_return_quantity (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Execution mode: vectorized + Map 20 Map Operator Tree: TableScan alias: date_dim @@ -282,7 +304,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 19 + Map 22 Map Operator Tree: TableScan alias: date_dim @@ -303,7 +325,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized - Map 22 + Map 25 Map Operator Tree: TableScan alias: date_dim @@ -329,28 +351,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 23 - Map Operator Tree: - TableScan - alias: store_returns - filterExpr: (sr_item_sk is not null and sr_returned_date_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sr_item_sk is not null and sr_returned_date_sk is not null) (type: boolean) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_return_quantity (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int) - Execution mode: vectorized - Map 24 + Map 26 Map Operator Tree: TableScan alias: item @@ -371,7 +372,28 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 25 + Map 29 + Map Operator Tree: + TableScan + alias: store_returns + filterExpr: (sr_item_sk is not null and sr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sr_item_sk is not null and sr_returned_date_sk is not null) (type: boolean) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_return_quantity (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Execution mode: vectorized + Map 31 Map Operator Tree: TableScan alias: date_dim @@ -392,7 +414,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 30 + Map 33 Map Operator Tree: TableScan alias: date_dim @@ -413,7 +435,7 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized - Map 33 + Map 36 Map Operator Tree: TableScan alias: date_dim @@ -439,7 +461,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 34 + Map 6 Map Operator Tree: TableScan alias: web_returns @@ -460,49 +482,50 @@ STAGE PLANS: Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 35 - Map Operator Tree: - TableScan - alias: item - filterExpr: (i_item_sk is not null and i_item_id is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (i_item_sk is not null and i_item_id is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_item_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized Map 8 Map Operator Tree: TableScan alias: date_dim - filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) + filterExpr: (d_date_sk is not null and d_date is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_week_seq is not null and d_date is not null) (type: boolean) + predicate: (d_date_sk is not null and d_date is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date (type: string), d_week_seq (type: int) + expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col1 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col0 (type: int) Execution mode: vectorized - Reducer 10 + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -522,17 +545,62 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col4 + Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4) + keys: _col1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 16 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17423323 Data size: 1849627061 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), UDFToDouble(_col1) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 17423323 Data size: 1849627061 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 17423323 Data size: 1849627061 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: double) + Reducer 17 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Statistics: Num rows: 38326912 Data size: 2969590853 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 16 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38326912 Data size: 2969590853 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: double), _col4 (type: bigint), _col5 (type: double) + Reducer 19 Reduce Operator Tree: Join Operator condition map: @@ -540,60 +608,40 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col4, _col5 - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2 + Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: int) + key expressions: _col1 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: int) - Reducer 17 + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col7 - Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col4 + Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col5) - keys: _col7 (type: string) + aggregations: sum(_col4) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 18 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 34842647 Data size: 2699627990 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), UDFToDouble(_col1) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 34842647 Data size: 2699627990 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 34842647 Data size: 2699627990 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: double) - Reducer 2 + Reducer 21 Reduce Operator Tree: Join Operator condition map: @@ -609,7 +657,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 20 + Reducer 23 Reduce Operator Tree: Join Operator condition map: @@ -631,7 +679,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 21 + Reducer 24 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -645,22 +693,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 40176 Data size: 44957448 Basic stats: COMPLETE Column stats: NONE - Reducer 26 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Reducer 27 Reduce Operator Tree: Join Operator @@ -668,41 +700,44 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col5 - Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: int) - Reducer 28 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col7 - Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE + 1 _col1 (type: int) + outputColumnNames: _col1, _col4 + Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col5) - keys: _col7 (type: string) + aggregations: sum(_col4) + keys: _col1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 17422145 Data size: 1603485031 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 69685294 Data size: 5399255980 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 29 + Reducer 28 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 34842647 Data size: 2699627990 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), UDFToDouble(_col1) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 34842647 Data size: 2699627990 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 34842647 Data size: 2699627990 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: double) + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -722,7 +757,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 8711072 Data size: 801742469 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: double) - Reducer 3 + Reducer 30 Reduce Operator Tree: Join Operator condition map: @@ -730,16 +765,32 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col4, _col5 - Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2 + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: int) + key expressions: _col1 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 31678769 Data size: 3362958220 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: int) - Reducer 31 + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reducer 32 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE + Reducer 34 Reduce Operator Tree: Join Operator condition map: @@ -761,7 +812,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reducer 32 + Reducer 35 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -780,75 +831,29 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col5, _col7 - Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5) - keys: _col7 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 5 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 17423323 Data size: 1849627061 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), UDFToDouble(_col1) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 17423323 Data size: 1849627061 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 17423323 Data size: 1849627061 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: double) - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col7, _col8 - Statistics: Num rows: 76653825 Data size: 5939181706 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7, _col8 + Statistics: Num rows: 42159604 Data size: 3266550009 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col4 (type: bigint), (((_col5 / UDFToDouble(((_col4 + _col1) + _col7))) / 3.0D) * 100.0D) (type: double), _col1 (type: bigint), (((_col2 / UDFToDouble(((_col4 + _col1) + _col7))) / 3.0D) * 100.0D) (type: double), _col7 (type: bigint), (((_col8 / UDFToDouble(((_col4 + _col1) + _col7))) / 3.0D) * 100.0D) (type: double), (CAST( ((_col4 + _col1) + _col7) AS decimal(19,0)) / 3) (type: decimal(25,6)) + expressions: _col3 (type: string), _col7 (type: bigint), (((_col8 / UDFToDouble(((_col7 + _col4) + _col1))) / 3.0D) * 100.0D) (type: double), _col4 (type: bigint), (((_col5 / UDFToDouble(((_col7 + _col4) + _col1))) / 3.0D) * 100.0D) (type: double), _col1 (type: bigint), (((_col2 / UDFToDouble(((_col7 + _col4) + _col1))) / 3.0D) * 100.0D) (type: double), (CAST( ((_col7 + _col4) + _col1) AS decimal(19,0)) / 3) (type: decimal(25,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 76653825 Data size: 5939181706 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42159604 Data size: 3266550009 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) null sort order: zz sort order: ++ - Statistics: Num rows: 76653825 Data size: 5939181706 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42159604 Data size: 3266550009 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: double), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: decimal(25,6)) - Reducer 7 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: double), VALUE._col1 (type: bigint), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: decimal(25,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 76653825 Data size: 5939181706 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42159604 Data size: 3266550009 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 7700 Basic stats: COMPLETE Column stats: NONE @@ -859,28 +864,39 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 + Reducer 7 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 15838314 Data size: 1457713633 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 80353 Data size: 89916016 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query85.q.out b/ql/src/test/results/clientpositive/perf/spark/query85.q.out index afe1c60089..3a6b60d716 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query85.q.out @@ -191,7 +191,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 15 + Map 1 Map Operator Tree: TableScan alias: web_page @@ -206,8 +206,8 @@ STAGE PLANS: Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -216,7 +216,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 14 + Map 8 Map Operator Tree: TableScan alias: reason @@ -231,8 +231,8 @@ STAGE PLANS: Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col4 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -240,37 +240,16 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 198), Reducer 9 (PARTITION-LEVEL SORT, 198) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 6 (PARTITION-LEVEL SORT, 154) - Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 401), Reducer 2 (PARTITION-LEVEL SORT, 401) - Reducer 4 <- Reducer 3 (GROUP, 58) - Reducer 5 <- Reducer 4 (SORT, 1) - Reducer 8 <- Map 11 (PARTITION-LEVEL SORT, 169), Map 7 (PARTITION-LEVEL SORT, 169) - Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 181), Reducer 8 (PARTITION-LEVEL SORT, 181) + Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 169), Map 9 (PARTITION-LEVEL SORT, 169) + Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 181), Reducer 10 (PARTITION-LEVEL SORT, 181) + Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 198), Reducer 11 (PARTITION-LEVEL SORT, 198) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154) + Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 401), Reducer 3 (PARTITION-LEVEL SORT, 401) + Reducer 5 <- Reducer 4 (GROUP, 58) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: web_sales - filterExpr: (ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null and ((ws_sales_price >= 100) or (ws_sales_price <= 150) or (ws_sales_price >= 50) or (ws_sales_price <= 100) or (ws_sales_price >= 150) or (ws_sales_price <= 200)) and ((ws_net_profit >= 100) or (ws_net_profit <= 200) or (ws_net_profit >= 150) or (ws_net_profit <= 300) or (ws_net_profit >= 50) or (ws_net_profit <= 250))) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null and ((ws_sales_price >= 100) or (ws_sales_price <= 150) or (ws_sales_price >= 50) or (ws_sales_price <= 100) or (ws_sales_price >= 150) or (ws_sales_price <= 200)) and ((ws_net_profit >= 100) or (ws_net_profit <= 200) or (ws_net_profit >= 150) or (ws_net_profit <= 300) or (ws_net_profit >= 50) or (ws_net_profit <= 250))) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_page_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_net_profit BETWEEN 100 AND 200 (type: boolean), ws_net_profit BETWEEN 150 AND 300 (type: boolean), ws_net_profit BETWEEN 50 AND 250 (type: boolean), ws_sales_price BETWEEN 100 AND 150 (type: boolean), ws_sales_price BETWEEN 50 AND 100 (type: boolean), ws_sales_price BETWEEN 150 AND 200 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean) - Execution mode: vectorized - Map 11 + Map 13 Map Operator Tree: TableScan alias: customer_address @@ -291,7 +270,7 @@ STAGE PLANS: Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean) Execution mode: vectorized - Map 12 + Map 14 Map Operator Tree: TableScan alias: cd2 @@ -312,7 +291,7 @@ STAGE PLANS: Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 13 + Map 15 Map Operator Tree: TableScan alias: cd1 @@ -333,7 +312,28 @@ STAGE PLANS: Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized - Map 6 + Map 2 + Map Operator Tree: + TableScan + alias: web_sales + filterExpr: (ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null and ((ws_sales_price >= 100) or (ws_sales_price <= 150) or (ws_sales_price >= 50) or (ws_sales_price <= 100) or (ws_sales_price >= 150) or (ws_sales_price <= 200)) and ((ws_net_profit >= 100) or (ws_net_profit <= 200) or (ws_net_profit >= 150) or (ws_net_profit <= 300) or (ws_net_profit >= 50) or (ws_net_profit <= 250))) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null and ((ws_sales_price >= 100) or (ws_sales_price <= 150) or (ws_sales_price >= 50) or (ws_sales_price <= 100) or (ws_sales_price >= 150) or (ws_sales_price <= 200)) and ((ws_net_profit >= 100) or (ws_net_profit <= 200) or (ws_net_profit >= 150) or (ws_net_profit <= 300) or (ws_net_profit >= 50) or (ws_net_profit <= 250))) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_page_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_net_profit BETWEEN 100 AND 200 (type: boolean), ws_net_profit BETWEEN 150 AND 300 (type: boolean), ws_net_profit BETWEEN 50 AND 250 (type: boolean), ws_sales_price BETWEEN 100 AND 150 (type: boolean), ws_sales_price BETWEEN 50 AND 100 (type: boolean), ws_sales_price BETWEEN 150 AND 200 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean) + Execution mode: vectorized + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -353,7 +353,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 9 Map Operator Tree: TableScan alias: web_returns @@ -375,6 +375,40 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) Execution mode: vectorized Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11 + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean) + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col13, _col14 + Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string), _col14 (type: string), _col1 (type: int) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col13 (type: string), _col14 (type: string), _col1 (type: int) + Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean) + Reducer 12 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -390,24 +424,20 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col5, _col6, _col7, _col9, _col10, _col11, _col18, _col19, _col20, _col21, _col22, _col23, _col25 + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col1, _col2, _col7, _col8, _col9, _col11, _col12, _col13, _col20, _col21, _col22, _col23, _col24, _col25 input vertices: - 1 Map 14 + 0 Map 8 Statistics: Num rows: 29282000 Data size: 29717715282 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col25 (type: string), _col0 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean) - outputColumnNames: _col1, _col2, _col7, _col8, _col9, _col11, _col12, _col13, _col20, _col21, _col22, _col23, _col24, _col25 + Reduce Output Operator + key expressions: _col2 (type: int), _col7 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col7 (type: int) Statistics: Num rows: 29282000 Data size: 29717715282 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int), _col7 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col7 (type: int) - Statistics: Num rows: 29282000 Data size: 29717715282 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean), _col25 (type: boolean) - Reducer 2 + value expressions: _col1 (type: string), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col24 (type: boolean), _col25 (type: boolean) + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -424,7 +454,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int), _col3 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean) - Reducer 3 + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -439,31 +469,35 @@ STAGE PLANS: Filter Operator predicate: (((_col23 and _col5) or (_col24 and _col6) or (_col25 and _col7)) and ((_col32 and _col33 and _col8) or (_col34 and _col35 and _col9) or (_col36 and _col37 and _col10))) (type: boolean) Statistics: Num rows: 49005909 Data size: 6663386377 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col13, _col20, _col21 - input vertices: - 1 Map 15 - Statistics: Num rows: 53906501 Data size: 7329725173 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col4), count(_col4), sum(_col21), count(_col21), sum(_col20), count(_col20) - keys: _col13 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Operator + expressions: _col2 (type: int), _col4 (type: int), _col13 (type: string), _col20 (type: decimal(7,2)), _col21 (type: decimal(7,2)) + outputColumnNames: _col2, _col4, _col13, _col20, _col21 + Statistics: Num rows: 49005909 Data size: 6663386377 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col5, _col14, _col21, _col22 + input vertices: + 0 Map 1 Statistics: Num rows: 53906501 Data size: 7329725173 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: sum(_col5), count(_col5), sum(_col22), count(_col22), sum(_col21), count(_col21) + keys: _col14 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 53906501 Data size: 7329725173 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint) - Reducer 4 + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 53906501 Data size: 7329725173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint) + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -482,7 +516,7 @@ STAGE PLANS: sort order: ++++ Statistics: Num rows: 26953250 Data size: 3664862518 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -499,40 +533,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11 - Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean) - Reducer 9 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col13, _col14 - Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string), _col14 (type: string), _col1 (type: int) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col13 (type: string), _col14 (type: string), _col1 (type: int) - Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query86.q.out b/ql/src/test/results/clientpositive/perf/spark/query86.q.out index 96228f6cf7..673d8c7e38 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query86.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query86.q.out @@ -64,14 +64,35 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 174), Reducer 2 (PARTITION-LEVEL SORT, 174) - Reducer 4 <- Reducer 3 (GROUP, 556) - Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 278) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 174), Reducer 7 (PARTITION-LEVEL SORT, 174) + Reducer 3 <- Reducer 2 (GROUP, 556) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 278) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 154), Map 8 (PARTITION-LEVEL SORT, 154) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: item + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_class (type: string), i_category (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: vectorized + Map 6 Map Operator Tree: TableScan alias: web_sales @@ -92,7 +113,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan alias: d1 @@ -112,27 +133,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: item - filterExpr: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_class (type: string), i_category (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator @@ -140,28 +140,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col5, _col6 + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col5 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col6 (type: string), _col5 (type: string), _col2 (type: decimal(7,2)) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -178,7 +161,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 522729705 Data size: 71076122589 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: decimal(17,2)) - Reducer 4 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -198,7 +181,7 @@ STAGE PLANS: Map-reduce partition columns: (grouping(_col3, 1L) + grouping(_col3, 0L)) (type: bigint), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END (type: string) Statistics: Num rows: 261364852 Data size: 35538061226 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) - Reducer 5 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -237,7 +220,7 @@ STAGE PLANS: Statistics: Num rows: 261364852 Data size: 35538061226 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: decimal(17,2)), _col1 (type: string), _col2 (type: string) - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -254,6 +237,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query87.q.out b/ql/src/test/results/clientpositive/perf/spark/query87.q.out index 514f446121..e1911d4048 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query87.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query87.q.out @@ -60,42 +60,42 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 306), Map 14 (PARTITION-LEVEL SORT, 306) - Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 873), Reducer 11 (PARTITION-LEVEL SORT, 873) - Reducer 13 <- Reducer 12 (GROUP PARTITION-LEVEL SORT, 369) - Reducer 17 <- Map 14 (PARTITION-LEVEL SORT, 154), Map 16 (PARTITION-LEVEL SORT, 154) - Reducer 18 <- Map 15 (PARTITION-LEVEL SORT, 706), Reducer 17 (PARTITION-LEVEL SORT, 706) - Reducer 19 <- Reducer 18 (GROUP PARTITION-LEVEL SORT, 186) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 14 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Map 15 (PARTITION-LEVEL SORT, 975), Reducer 2 (PARTITION-LEVEL SORT, 975) - Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 481) - Reducer 5 <- Reducer 13 (GROUP PARTITION-LEVEL SORT, 213), Reducer 4 (GROUP PARTITION-LEVEL SORT, 213) - Reducer 6 <- Reducer 19 (GROUP, 56), Reducer 5 (GROUP, 56) - Reducer 7 <- Reducer 6 (GROUP, 1) + Reducer 11 <- Map 1 (PARTITION-LEVEL SORT, 873), Reducer 14 (PARTITION-LEVEL SORT, 873) + Reducer 12 <- Reducer 11 (GROUP PARTITION-LEVEL SORT, 369) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 15 (PARTITION-LEVEL SORT, 306) + Reducer 17 <- Map 1 (PARTITION-LEVEL SORT, 706), Reducer 20 (PARTITION-LEVEL SORT, 706) + Reducer 18 <- Reducer 17 (GROUP PARTITION-LEVEL SORT, 186) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 975), Reducer 8 (PARTITION-LEVEL SORT, 975) + Reducer 20 <- Map 15 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 481) + Reducer 4 <- Reducer 12 (GROUP PARTITION-LEVEL SORT, 213), Reducer 3 (GROUP PARTITION-LEVEL SORT, 213) + Reducer 5 <- Reducer 18 (GROUP, 56), Reducer 4 (GROUP, 56) + Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 8 <- Map 15 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: customer + filterExpr: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: c_customer_sk is not null (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 10 + Map 13 Map Operator Tree: TableScan alias: catalog_sales @@ -116,7 +116,7 @@ STAGE PLANS: Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map 14 + Map 15 Map Operator Tree: TableScan alias: date_dim @@ -137,46 +137,46 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 15 + Map 19 Map Operator Tree: TableScan - alias: customer - filterExpr: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (ws_sold_date_sk is not null and ws_bill_customer_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: c_customer_sk is not null (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_sold_date_sk is not null and ws_bill_customer_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_first_name (type: string), c_last_name (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized - Map 16 + Map 7 Map Operator Tree: TableScan - alias: web_sales - filterExpr: (ws_sold_date_sk is not null and ws_bill_customer_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ws_sold_date_sk is not null and ws_bill_customer_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int) + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized Reducer 11 @@ -186,28 +186,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) - Reducer 12 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6 + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col6 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col6 (type: string), _col5 (type: string), _col3 (type: string) + keys: _col2 (type: string), _col1 (type: string), _col6 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -218,7 +201,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reducer 13 + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -258,7 +241,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 261355616 Data size: 27168769766 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: bigint) - Reducer 17 + Reducer 14 Reduce Operator Tree: Join Operator condition map: @@ -267,26 +250,26 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col3 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: string) - Reducer 18 + Reducer 17 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col6 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col6 (type: string), _col5 (type: string), _col3 (type: string) + keys: _col2 (type: string), _col1 (type: string), _col6 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -297,7 +280,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reducer 19 + Reducer 18 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -344,28 +327,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6 + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col6 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col6 (type: string), _col5 (type: string), _col3 (type: string) + keys: _col2 (type: string), _col1 (type: string), _col6 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -376,7 +342,24 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reducer 4 + Reducer 20 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -416,7 +399,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 261355616 Data size: 27168769766 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: bigint) - Reducer 5 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -460,7 +443,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 54450625 Data size: 7055042151 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: bigint) - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -489,7 +472,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 7 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -504,6 +487,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query88.q.out b/ql/src/test/results/clientpositive/perf/spark/query88.q.out index 9ed159993a..a5013199ab 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query88.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query88.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[251][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[259][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from @@ -212,6 +212,26 @@ STAGE PLANS: #### A masked pattern was here #### Vertices: Map 10 + Map Operator Tree: + TableScan + alias: time_dim + filterExpr: ((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_time_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Map 6 Map Operator Tree: TableScan alias: store @@ -226,12 +246,12 @@ STAGE PLANS: Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 8 + Map 9 Map Operator Tree: TableScan alias: household_demographics @@ -251,40 +271,20 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 9 - Map Operator Tree: - TableScan - alias: time_dim - filterExpr: ((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: t_time_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 14400 Data size: 6782400 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-2 Spark Edges: - Reducer 12 <- Map 11 (GROUP, 1) - Reducer 17 <- Map 16 (GROUP, 1) - Reducer 22 <- Map 21 (GROUP, 1) - Reducer 27 <- Map 26 (GROUP, 1) - Reducer 32 <- Map 31 (GROUP, 1) - Reducer 37 <- Map 36 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 1) + Reducer 13 <- Map 12 (GROUP, 1) + Reducer 18 <- Map 17 (GROUP, 1) + Reducer 23 <- Map 22 (GROUP, 1) + Reducer 28 <- Map 27 (GROUP, 1) + Reducer 33 <- Map 32 (GROUP, 1) + Reducer 38 <- Map 37 (GROUP, 1) + Reducer 8 <- Map 7 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 11 + Map 12 Map Operator Tree: TableScan alias: store_sales @@ -305,7 +305,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col2 input vertices: - 1 Map 13 + 1 Map 14 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -315,16 +315,16 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col2 input vertices: - 1 Map 14 + 1 Map 15 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) input vertices: - 1 Map 15 + 0 Map 11 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -340,7 +340,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 16 + Map 17 Map Operator Tree: TableScan alias: store_sales @@ -361,7 +361,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col2 input vertices: - 1 Map 18 + 1 Map 19 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -371,16 +371,16 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col2 input vertices: - 1 Map 19 + 1 Map 20 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) input vertices: - 1 Map 20 + 0 Map 16 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -396,7 +396,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 21 + Map 22 Map Operator Tree: TableScan alias: store_sales @@ -417,7 +417,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col2 input vertices: - 1 Map 23 + 1 Map 24 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -427,16 +427,16 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col2 input vertices: - 1 Map 24 + 1 Map 25 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) input vertices: - 1 Map 25 + 0 Map 21 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -452,7 +452,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 26 + Map 27 Map Operator Tree: TableScan alias: store_sales @@ -473,7 +473,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col2 input vertices: - 1 Map 28 + 1 Map 29 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -483,16 +483,16 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col2 input vertices: - 1 Map 29 + 1 Map 30 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) input vertices: - 1 Map 30 + 0 Map 26 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -508,7 +508,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 31 + Map 32 Map Operator Tree: TableScan alias: store_sales @@ -529,7 +529,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col2 input vertices: - 1 Map 33 + 1 Map 34 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -539,16 +539,16 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col2 input vertices: - 1 Map 34 + 1 Map 35 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) input vertices: - 1 Map 35 + 0 Map 31 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -564,7 +564,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 36 + Map 37 Map Operator Tree: TableScan alias: store_sales @@ -585,7 +585,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col2 input vertices: - 1 Map 38 + 1 Map 39 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -595,16 +595,16 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col2 input vertices: - 1 Map 39 + 1 Map 40 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) input vertices: - 1 Map 40 + 0 Map 36 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -620,7 +620,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 6 + Map 7 Map Operator Tree: TableScan alias: store_sales @@ -641,7 +641,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col2 input vertices: - 1 Map 8 + 1 Map 9 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -651,16 +651,16 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col2 input vertices: - 1 Map 9 + 1 Map 10 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) input vertices: - 1 Map 10 + 0 Map 6 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -676,7 +676,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 12 + Reducer 13 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -696,7 +696,7 @@ STAGE PLANS: 5 6 7 - Reducer 17 + Reducer 18 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -716,7 +716,7 @@ STAGE PLANS: 5 6 7 - Reducer 22 + Reducer 23 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -736,7 +736,7 @@ STAGE PLANS: 5 6 7 - Reducer 27 + Reducer 28 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -756,7 +756,7 @@ STAGE PLANS: 5 6 7 - Reducer 32 + Reducer 33 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -776,7 +776,7 @@ STAGE PLANS: 5 6 7 - Reducer 37 + Reducer 38 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -796,7 +796,7 @@ STAGE PLANS: 5 6 7 - Reducer 7 + Reducer 8 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -821,7 +821,27 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: store + filterExpr: (s_store_sk is not null and (s_store_name = 'ese')) (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s_store_sk is not null and (s_store_name = 'ese')) (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: household_demographics @@ -841,7 +861,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 4 + Map 5 Map Operator Tree: TableScan alias: time_dim @@ -861,34 +881,14 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 5 - Map Operator Tree: - TableScan - alias: store - filterExpr: (s_store_sk is not null and (s_store_name = 'ese')) (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (s_store_sk is not null and (s_store_name = 'ese')) (type: boolean) - Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: store_sales @@ -909,7 +909,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col2 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -919,16 +919,16 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col2 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) input vertices: - 1 Map 5 + 0 Map 1 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -944,7 +944,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -974,13 +974,13 @@ STAGE PLANS: 7 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 input vertices: - 1 Reducer 7 - 2 Reducer 12 - 3 Reducer 17 - 4 Reducer 22 - 5 Reducer 27 - 6 Reducer 32 - 7 Reducer 37 + 1 Reducer 8 + 2 Reducer 13 + 3 Reducer 18 + 4 Reducer 23 + 5 Reducer 28 + 6 Reducer 33 + 7 Reducer 38 Statistics: Num rows: 1 Data size: 65 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint), _col7 (type: bigint), _col6 (type: bigint), _col5 (type: bigint), _col4 (type: bigint), _col3 (type: bigint), _col2 (type: bigint), _col1 (type: bigint) @@ -998,7 +998,27 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 13 + Map 11 + Map Operator Tree: + TableScan + alias: store + filterExpr: (s_store_sk is not null and (s_store_name = 'ese')) (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s_store_sk is not null and (s_store_name = 'ese')) (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Map 14 Map Operator Tree: TableScan alias: household_demographics @@ -1018,7 +1038,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 14 + Map 15 Map Operator Tree: TableScan alias: time_dim @@ -1038,7 +1058,12 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 15 + + Stage: Stage-5 + Spark +#### A masked pattern was here #### + Vertices: + Map 16 Map Operator Tree: TableScan alias: store @@ -1053,17 +1078,12 @@ STAGE PLANS: Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: - Map 18 + Map 19 Map Operator Tree: TableScan alias: household_demographics @@ -1083,7 +1103,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 19 + Map 20 Map Operator Tree: TableScan alias: time_dim @@ -1103,7 +1123,12 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 20 + + Stage: Stage-6 + Spark +#### A masked pattern was here #### + Vertices: + Map 21 Map Operator Tree: TableScan alias: store @@ -1118,17 +1143,12 @@ STAGE PLANS: Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - - Stage: Stage-6 - Spark -#### A masked pattern was here #### - Vertices: - Map 23 + Map 24 Map Operator Tree: TableScan alias: household_demographics @@ -1148,7 +1168,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 24 + Map 25 Map Operator Tree: TableScan alias: time_dim @@ -1168,7 +1188,12 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 25 + + Stage: Stage-7 + Spark +#### A masked pattern was here #### + Vertices: + Map 26 Map Operator Tree: TableScan alias: store @@ -1183,17 +1208,12 @@ STAGE PLANS: Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 28 + Map 29 Map Operator Tree: TableScan alias: household_demographics @@ -1213,7 +1233,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 29 + Map 30 Map Operator Tree: TableScan alias: time_dim @@ -1233,7 +1253,12 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 30 + + Stage: Stage-8 + Spark +#### A masked pattern was here #### + Vertices: + Map 31 Map Operator Tree: TableScan alias: store @@ -1248,17 +1273,12 @@ STAGE PLANS: Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - - Stage: Stage-8 - Spark -#### A masked pattern was here #### - Vertices: - Map 33 + Map 34 Map Operator Tree: TableScan alias: household_demographics @@ -1278,7 +1298,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 34 + Map 35 Map Operator Tree: TableScan alias: time_dim @@ -1298,7 +1318,12 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 35 + + Stage: Stage-9 + Spark +#### A masked pattern was here #### + Vertices: + Map 36 Map Operator Tree: TableScan alias: store @@ -1313,17 +1338,12 @@ STAGE PLANS: Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - - Stage: Stage-9 - Spark -#### A masked pattern was here #### - Vertices: - Map 38 + Map 39 Map Operator Tree: TableScan alias: household_demographics @@ -1343,7 +1363,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 39 + Map 40 Map Operator Tree: TableScan alias: time_dim @@ -1363,26 +1383,6 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 40 - Map Operator Tree: - TableScan - alias: store - filterExpr: (s_store_sk is not null and (s_store_name = 'ese')) (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (s_store_sk is not null and (s_store_name = 'ese')) (type: boolean) - Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query89.q.out b/ql/src/test/results/clientpositive/perf/spark/query89.q.out index 70e2d15a5a..10a0e861bc 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query89.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query89.q.out @@ -72,7 +72,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 1 Map Operator Tree: TableScan alias: store @@ -87,8 +87,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col4 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -96,14 +96,35 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 403), Map 7 (PARTITION-LEVEL SORT, 403) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 438), Reducer 8 (PARTITION-LEVEL SORT, 438) Reducer 4 <- Reducer 3 (GROUP, 529) Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 265) Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 403), Map 9 (PARTITION-LEVEL SORT, 403) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_moy (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 7 Map Operator Tree: TableScan alias: store_sales @@ -124,7 +145,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 7 + Map 9 Map Operator Tree: TableScan alias: item @@ -145,44 +166,6 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), d_moy (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col5, _col6, _col7 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string) Reducer 3 Local Work: Map Reduce Local Work @@ -193,21 +176,21 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5, _col6, _col7, _col9 + outputColumnNames: _col1, _col4, _col5, _col7, _col8, _col9 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col7, _col9, _col11, _col12 + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col1, _col2, _col4, _col8, _col10, _col11, _col12 input vertices: - 1 Map 9 + 0 Map 1 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col3) - keys: _col11 (type: string), _col12 (type: string), _col9 (type: int), _col5 (type: string), _col6 (type: string), _col7 (type: string) + aggregations: sum(_col8) + keys: _col1 (type: string), _col2 (type: string), _col4 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -297,6 +280,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col5, _col6, _col7 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query90.q.out b/ql/src/test/results/clientpositive/perf/spark/query90.q.out index 818e0b0233..e3fc620677 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query90.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query90.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[65][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[67][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio from ( select count(*) amc @@ -63,7 +63,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 11 + Map 12 Map Operator Tree: TableScan alias: web_page @@ -83,7 +83,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 12 + Map 7 Map Operator Tree: TableScan alias: household_demographics @@ -98,8 +98,8 @@ STAGE PLANS: Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -107,11 +107,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154) - Reducer 9 <- Reducer 8 (GROUP, 1) + Reducer 10 <- Reducer 9 (GROUP, 1) + Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 154), Map 8 (PARTITION-LEVEL SORT, 154) #### A masked pattern was here #### Vertices: - Map 10 + Map 11 Map Operator Tree: TableScan alias: time_dim @@ -131,7 +131,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan alias: web_sales @@ -152,7 +152,21 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Reducer 8 + Reducer 10 + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 + Reducer 9 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -172,16 +186,16 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1 input vertices: - 1 Map 11 + 1 Map 12 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) input vertices: - 1 Map 12 + 0 Map 7 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -194,61 +208,47 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 9 - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: - Map 5 + Map 1 Map Operator Tree: TableScan - alias: web_page - filterExpr: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE + predicate: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wp_web_page_sk (type: int) + expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work Map 6 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: web_page + filterExpr: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) + expressions: wp_web_page_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -257,11 +257,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 4 (PARTITION-LEVEL SORT, 154) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 154), Map 5 (PARTITION-LEVEL SORT, 154) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: web_sales @@ -282,7 +282,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: time_dim @@ -302,7 +302,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 2 + Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -322,16 +322,16 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1 input vertices: - 1 Map 5 + 1 Map 6 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) input vertices: - 1 Map 6 + 0 Map 1 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -344,7 +344,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -362,7 +362,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 input vertices: - 1 Reducer 9 + 1 Reducer 10 Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (CAST( _col0 AS decimal(15,4)) / CAST( _col1 AS decimal(15,4))) (type: decimal(35,20)) diff --git a/ql/src/test/results/clientpositive/perf/spark/query91.q.out b/ql/src/test/results/clientpositive/perf/spark/query91.q.out index cc1573a173..9e87dfad80 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query91.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query91.q.out @@ -85,7 +85,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 1 Map Operator Tree: TableScan alias: call_center @@ -100,8 +100,8 @@ STAGE PLANS: Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -110,7 +110,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 13 + Map 8 Map Operator Tree: TableScan alias: household_demographics @@ -125,8 +125,8 @@ STAGE PLANS: Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col3 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -134,36 +134,36 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 750), Reducer 9 (PARTITION-LEVEL SORT, 750) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 25), Map 6 (PARTITION-LEVEL SORT, 25) - Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 745), Reducer 2 (PARTITION-LEVEL SORT, 745) - Reducer 4 <- Reducer 3 (GROUP, 787) - Reducer 5 <- Reducer 4 (SORT, 1) - Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 541), Map 8 (PARTITION-LEVEL SORT, 541) + Reducer 10 <- Map 9 (PARTITION-LEVEL SORT, 750), Reducer 12 (PARTITION-LEVEL SORT, 750) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 541), Map 13 (PARTITION-LEVEL SORT, 541) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 25), Map 7 (PARTITION-LEVEL SORT, 25) + Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 745), Reducer 3 (PARTITION-LEVEL SORT, 745) + Reducer 5 <- Reducer 4 (GROUP, 787) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 11 Map Operator Tree: TableScan - alias: catalog_returns - filterExpr: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + alias: customer + filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + predicate: (c_customer_sk is not null and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_call_center_sk (type: int), cr_net_loss (type: decimal(7,2)) + expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_hdemo_sk (type: int), c_current_addr_sk (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Map 11 + Map 13 Map Operator Tree: TableScan alias: customer_demographics @@ -184,27 +184,28 @@ STAGE PLANS: Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 12 + Map 2 Map Operator Tree: TableScan - alias: customer_address - filterExpr: ((ca_gmt_offset = -7) and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + alias: catalog_returns + filterExpr: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ca_gmt_offset = -7) and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + predicate: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ca_address_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_call_center_sk (type: int), cr_net_loss (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 6 + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -224,26 +225,25 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 + Map 9 Map Operator Tree: TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + alias: customer_address + filterExpr: ((ca_gmt_offset = -7) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_customer_sk is not null and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + predicate: ((ca_gmt_offset = -7) and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_hdemo_sk (type: int), c_current_addr_sk (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + expressions: ca_address_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 10 Local Work: @@ -253,32 +253,45 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col5, _col6 + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col1, _col3, _col6, _col7 Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col5, _col6 + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col2, _col7, _col8 input vertices: - 1 Map 13 + 0 Map 8 Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col2, _col7, _col8 + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE - value expressions: _col7 (type: string), _col8 (type: string) - Reducer 2 + value expressions: _col7 (type: string), _col8 (type: string) + Reducer 12 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col5 (type: string), _col6 (type: string) + Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -294,32 +307,32 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col6, _col7, _col8 + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col2, _col3, _col5, _col7 input vertices: - 1 Map 7 + 0 Map 1 Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col5 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col5 (type: int) Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)), _col6 (type: string), _col7 (type: string), _col8 (type: string) - Reducer 3 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: decimal(7,2)) + Reducer 4 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col5 (type: int) 1 _col2 (type: int) - outputColumnNames: _col3, _col6, _col7, _col8, _col16, _col17 + outputColumnNames: _col1, _col2, _col3, _col7, _col16, _col17 Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col3) - keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col16 (type: string), _col17 (type: string) + aggregations: sum(_col7) + keys: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col16 (type: string), _col17 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -331,7 +344,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(17,2)) - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -350,7 +363,7 @@ STAGE PLANS: sort order: - Statistics: Num rows: 58564004 Data size: 50366227250 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -364,23 +377,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col5, _col6 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col5 (type: string), _col6 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query93.q.out b/ql/src/test/results/clientpositive/perf/spark/query93.q.out index 09441d24e7..ad1b9de16d 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query93.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query93.q.out @@ -50,7 +50,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 5 + Map 6 Map Operator Tree: TableScan alias: reason @@ -74,12 +74,33 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 436), Map 6 (PARTITION-LEVEL SORT, 436) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 436), Map 5 (PARTITION-LEVEL SORT, 436) Reducer 3 <- Reducer 2 (GROUP, 437) Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_item_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_item_sk is not null and ss_ticket_number is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)), (CAST( ss_quantity AS decimal(10,0)) * ss_sales_price) (type: decimal(18,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(18,2)) + Execution mode: vectorized + Map 5 Map Operator Tree: TableScan alias: store_returns @@ -100,7 +121,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col2, _col3, _col4 input vertices: - 1 Map 5 + 1 Map 6 Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: int) @@ -112,27 +133,6 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 6 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_item_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_item_sk is not null and ss_ticket_number is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_item_sk (type: int), ss_customer_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)), (CAST( ss_quantity AS decimal(10,0)) * ss_sales_price) (type: decimal(18,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col2 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col2 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(18,2)) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator @@ -141,10 +141,10 @@ STAGE PLANS: keys: 0 _col0 (type: int), _col2 (type: int) 1 _col0 (type: int), _col2 (type: int) - outputColumnNames: _col3, _col4, _col7, _col9, _col10, _col11 + outputColumnNames: _col1, _col3, _col4, _col5, _col9, _col10 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col7 (type: int), CASE WHEN (_col4) THEN ((CAST( (_col9 - _col3) AS decimal(10,0)) * _col10)) ELSE (_col11) END (type: decimal(18,2)) + expressions: _col1 (type: int), CASE WHEN (_col10) THEN ((CAST( (_col3 - _col9) AS decimal(10,0)) * _col4)) ELSE (_col5) END (type: decimal(18,2)) outputColumnNames: _col0, _col1 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query94.q.out b/ql/src/test/results/clientpositive/perf/spark/query94.q.out index 290e323523..f5d0e52e4a 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query94.q.out @@ -76,42 +76,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 1 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00') (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: web_site + filterExpr: (web_site_sk is not null and (web_company_name = 'pri')) (type: boolean) + Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00') (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + predicate: (web_site_sk is not null and (web_company_name = 'pri')) (type: boolean) + Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) + expressions: web_site_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) - 1 _col0 (type: int) + 1 _col2 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work Map 9 Map Operator Tree: TableScan - alias: web_site - filterExpr: (web_site_sk is not null and (web_company_name = 'pri')) (type: boolean) - Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00') (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (web_site_sk is not null and (web_company_name = 'pri')) (type: boolean) - Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE + predicate: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00') (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: web_site_sk (type: int) + expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -121,34 +121,13 @@ STAGE PLANS: Spark Edges: Reducer 12 <- Map 11 (GROUP, 11) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 312), Map 7 (PARTITION-LEVEL SORT, 312) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 357), Reducer 2 (PARTITION-LEVEL SORT, 357) - Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 230), Reducer 3 (PARTITION-LEVEL SORT, 230) - Reducer 5 <- Reducer 4 (GROUP, 124) - Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 312), Map 8 (PARTITION-LEVEL SORT, 312) + Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 357), Reducer 3 (PARTITION-LEVEL SORT, 357) + Reducer 5 <- Reducer 12 (PARTITION-LEVEL SORT, 230), Reducer 4 (PARTITION-LEVEL SORT, 230) + Reducer 6 <- Reducer 5 (GROUP, 124) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: ws1 - filterExpr: (ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null and ws_order_number is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null and ws_order_number is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ws_ship_date_sk (type: int), ws_ship_addr_sk (type: int), ws_web_site_sk (type: int), ws_warehouse_sk (type: int), ws_order_number (type: int), ws_ext_ship_cost (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - Execution mode: vectorized Map 10 Map Operator Tree: TableScan @@ -198,7 +177,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 2 + Map Operator Tree: + TableScan + alias: ws1 + filterExpr: (ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null and ws_order_number is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null and ws_order_number is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ws_ship_date_sk (type: int), ws_ship_addr_sk (type: int), ws_web_site_sk (type: int), ws_warehouse_sk (type: int), ws_order_number (type: int), ws_ext_ship_cost (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: customer_address @@ -237,7 +237,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 7199233 Data size: 662597045 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: boolean) - Reducer 2 + Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -257,26 +257,30 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col2, _col3, _col4, _col5, _col6 input vertices: - 1 Map 8 + 1 Map 9 Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col6 + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col5, _col6, _col7, _col8 input vertices: - 1 Map 9 + 0 Map 1 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col4 (type: int) + Select Operator + expressions: _col5 (type: int), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) + outputColumnNames: _col3, _col4, _col5, _col6 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - Reducer 3 + Reduce Output Operator + key expressions: _col4 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -298,7 +302,7 @@ STAGE PLANS: Map-reduce partition columns: _col4 (type: int) Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - Reducer 4 + Reducer 5 Reduce Operator Tree: Join Operator condition map: @@ -329,7 +333,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 115958879 Data size: 15767054151 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -348,7 +352,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 6 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query95.q.out b/ql/src/test/results/clientpositive/perf/spark/query95.q.out index db263fb8c9..a205dd4979 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query95.q.out @@ -82,42 +82,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 15 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00') (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: web_site + filterExpr: (web_site_sk is not null and (web_company_name = 'pri')) (type: boolean) + Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00') (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + predicate: (web_site_sk is not null and (web_company_name = 'pri')) (type: boolean) + Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) + expressions: web_site_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) - 1 _col0 (type: int) + 1 _col2 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 8 + Map 19 Map Operator Tree: TableScan - alias: web_site - filterExpr: (web_site_sk is not null and (web_company_name = 'pri')) (type: boolean) - Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00') (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (web_site_sk is not null and (web_company_name = 'pri')) (type: boolean) - Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE + predicate: (d_date_sk is not null and CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00') (type: boolean) + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: web_site_sk (type: int) + expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 42 Data size: 77704 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -126,42 +126,22 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306) - Reducer 11 <- Reducer 10 (GROUP, 169) - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 17 (PARTITION-LEVEL SORT, 306) - Reducer 15 <- Map 18 (PARTITION-LEVEL SORT, 179), Reducer 14 (PARTITION-LEVEL SORT, 179) - Reducer 16 <- Reducer 15 (GROUP, 186) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 312), Map 6 (PARTITION-LEVEL SORT, 312) - Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 381), Reducer 16 (PARTITION-LEVEL SORT, 381), Reducer 2 (PARTITION-LEVEL SORT, 381) - Reducer 4 <- Reducer 3 (GROUP, 448) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 306), Map 14 (PARTITION-LEVEL SORT, 306) + Reducer 12 <- Reducer 11 (GROUP, 169) + Reducer 13 <- Reducer 12 (PARTITION-LEVEL SORT, 288), Reducer 17 (PARTITION-LEVEL SORT, 288) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 312), Map 18 (PARTITION-LEVEL SORT, 312) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 8 (PARTITION-LEVEL SORT, 306) + Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 179), Reducer 2 (PARTITION-LEVEL SORT, 179) + Reducer 4 <- Reducer 3 (GROUP, 186) + Reducer 5 <- Reducer 13 (PARTITION-LEVEL SORT, 317), Reducer 4 (PARTITION-LEVEL SORT, 317) + Reducer 6 <- Reducer 5 (GROUP, 247) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: ws1 - filterExpr: (ws_order_number is not null and ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ws_order_number is not null and ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ws_ship_date_sk (type: int), ws_ship_addr_sk (type: int), ws_web_site_sk (type: int), ws_order_number (type: int), ws_ext_ship_cost (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) - Execution mode: vectorized - Map 12 - Map Operator Tree: - TableScan - alias: ws2 filterExpr: ws_order_number is not null (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -179,7 +159,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 13 + Map 10 Map Operator Tree: TableScan alias: ws1 @@ -200,7 +180,7 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 17 + Map 14 Map Operator Tree: TableScan alias: ws2 @@ -221,27 +201,28 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 18 + Map 16 Map Operator Tree: TableScan - alias: web_returns - filterExpr: wr_order_number is not null (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + alias: ws1 + filterExpr: (ws_order_number is not null and ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: wr_order_number is not null (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_order_number is not null and ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wr_order_number (type: int) - outputColumnNames: _col13 - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + expressions: ws_ship_date_sk (type: int), ws_ship_addr_sk (type: int), ws_web_site_sk (type: int), ws_order_number (type: int), ws_ext_ship_cost (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col13 (type: int) + key expressions: _col1 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col13 (type: int) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized - Map 6 + Map 18 Map Operator Tree: TableScan alias: customer_address @@ -261,10 +242,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 9 + Map 8 Map Operator Tree: TableScan - alias: ws1 + alias: ws2 filterExpr: ws_order_number is not null (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -282,7 +263,27 @@ STAGE PLANS: Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Reducer 10 + Map 9 + Map Operator Tree: + TableScan + alias: web_returns + filterExpr: wr_order_number is not null (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: wr_order_number is not null (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: wr_order_number (type: int) + outputColumnNames: _col13 + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col13 (type: int) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 11 Reduce Operator Tree: Join Operator condition map: @@ -311,7 +312,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reducer 11 + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -325,7 +326,63 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE - Reducer 14 + Reducer 13 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col4 (type: int) + outputColumnNames: _col5, _col6, _col7 + Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col5 (type: int) + Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE + value expressions: _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + Reducer 17 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col3, _col4, _col5 + input vertices: + 1 Map 19 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col4, _col5, _col6 + input vertices: + 0 Map 15 + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Reducer 2 Reduce Operator Tree: Join Operator condition map: @@ -348,7 +405,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reducer 15 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -370,7 +427,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reducer 16 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -384,72 +441,31 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4, _col5 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5 - input vertices: - 1 Map 7 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5 - input vertices: - 1 Map 8 - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) - Reducer 3 + Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5 - Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col5 (type: int) + outputColumnNames: _col6, _col7, _col8 + Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col4), sum(_col5) - keys: _col3 (type: int) + aggregations: sum(_col7), sum(_col8) + keys: _col6 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) - Reducer 4 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -457,7 +473,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), sum(_col1), sum(_col2) mode: partial2 @@ -468,7 +484,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 5 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query96.q.out b/ql/src/test/results/clientpositive/perf/spark/query96.q.out index e4bf47884a..c7c4097d24 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query96.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query96.q.out @@ -48,7 +48,27 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: store + filterExpr: (s_store_sk is not null and (s_store_name = 'ese')) (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s_store_sk is not null and (s_store_name = 'ese')) (type: boolean) + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: time_dim @@ -68,7 +88,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 4 + Map 5 Map Operator Tree: TableScan alias: household_demographics @@ -88,34 +108,14 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 5 - Map Operator Tree: - TableScan - alias: store - filterExpr: (s_store_sk is not null and (s_store_name = 'ese')) (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (s_store_sk is not null and (s_store_name = 'ese')) (type: boolean) - Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 852 Data size: 1628138 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: store_sales @@ -136,7 +136,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -146,16 +146,16 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col2 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) input vertices: - 1 Map 5 + 0 Map 1 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -171,7 +171,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query99.q.out b/ql/src/test/results/clientpositive/perf/spark/query99.q.out index 39cf2ee29e..964498bd03 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query99.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query99.q.out @@ -89,43 +89,43 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 1 Map Operator Tree: TableScan - alias: call_center - filterExpr: cc_call_center_sk is not null (type: boolean) - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + alias: warehouse + filterExpr: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: cc_call_center_sk is not null (type: boolean) - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + predicate: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cc_call_center_sk (type: int), cc_name (type: string) + expressions: w_warehouse_sk (type: int), substr(w_warehouse_name, 1, 20) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col6 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 8 + Map 2 Map Operator Tree: TableScan - alias: warehouse - filterExpr: w_warehouse_sk is not null (type: boolean) - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + alias: call_center + filterExpr: cc_call_center_sk is not null (type: boolean) + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: w_warehouse_sk is not null (type: boolean) - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + predicate: cc_call_center_sk is not null (type: boolean) + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: w_warehouse_sk (type: int), substr(w_warehouse_name, 1, 20) (type: string) + expressions: cc_call_center_sk (type: int), cc_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col2 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -134,7 +134,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 5 + Map 8 Map Operator Tree: TableScan alias: ship_mode @@ -158,12 +158,32 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 336), Map 6 (PARTITION-LEVEL SORT, 336) - Reducer 3 <- Reducer 2 (GROUP, 447) - Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 336), Map 7 (PARTITION-LEVEL SORT, 336) + Reducer 5 <- Reducer 4 (GROUP, 447) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 7 Map Operator Tree: TableScan alias: catalog_sales @@ -184,7 +204,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col10 input vertices: - 1 Map 5 + 1 Map 8 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -196,27 +216,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 6 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Reducer 2 + Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -226,31 +226,31 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col8, _col9, _col11 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: PARTIAL Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col13 + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col6, _col7, _col8, _col9, _col10, _col11, _col13 input vertices: - 1 Map 7 + 0 Map 2 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: PARTIAL Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col10, _col13, _col15 + 0 _col0 (type: int) + 1 _col6 (type: int) + outputColumnNames: _col1, _col3, _col9, _col10, _col11, _col12, _col13, _col15 input vertices: - 1 Map 8 + 0 Map 1 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: sum(_col4), sum(_col5), sum(_col6), sum(_col7), sum(_col8) - keys: _col15 (type: string), _col10 (type: string), _col13 (type: string) + aggregations: sum(_col9), sum(_col10), sum(_col11), sum(_col12), sum(_col13) + keys: _col1 (type: string), _col15 (type: string), _col3 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -263,7 +263,7 @@ STAGE PLANS: Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) - Reducer 3 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -283,7 +283,7 @@ STAGE PLANS: Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: PARTIAL Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) - Reducer 4 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_ext_query1.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_ext_query1.q.out index 2cda493bf7..cb7ee12be1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_ext_query1.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_ext_query1.q.out @@ -64,20 +64,21 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cum HiveProject(c_customer_sk=[$0], c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveFilter(condition=[IS NOT NULL($0)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveTableScan(table=[[default, customer]], table:alias=[customer]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[AND(=($24, _UTF-16LE'NM'), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[IS NOT NULL($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(s_store_sk=[$0], $f0=[$1], $f1=[$2], $f2=[$3]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[AND(=($24, _UTF-16LE'NM'), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[IS NOT NULL($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2, 1))], ctr_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### @@ -157,20 +158,21 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cum HiveProject(c_customer_sk=[$0], c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveFilter(condition=[IS NOT NULL($0)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveTableScan(table=[[default, customer]], table:alias=[customer]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[{455187.9173657213 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[AND(=($24, _UTF-16LE'NM'), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[IS NOT NULL($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[{5.175767820386722E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(s_store_sk=[$0], $f0=[$1], $f1=[$2], $f2=[$3]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[{455187.9173657213 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[AND(=($24, _UTF-16LE'NM'), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[IS NOT NULL($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[{5.175767820386722E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2, 1))], ctr_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query1.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query1.q.out index 04925537c9..0b7094aa4f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query1.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query1.q.out @@ -64,20 +64,21 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[AND(=($24, _UTF-16LE'NM'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], $f0=[$1], $f1=[$2], $f2=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[AND(=($24, _UTF-16LE'NM'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2, 1))], ctr_store_sk=[$0]) HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]) HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query11.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query11.q.out index b6f3727e70..5fd8252f8e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query11.q.out @@ -166,54 +166,59 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], -=[-($17, $14)]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f8=[$7]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f9=[$7]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], -=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], -=[-($17, $14)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f8=[$1], $f00=[$2], $f9=[$3], customer_id=[$4], year_total=[$5], CAST=[$6]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f8=[$7]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], -=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f9=[$7]) + HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], -=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], -=[-($17, $14)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], -=[-($17, $14)]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], -=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query12.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query12.q.out index edb893eb6d..727283f813 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query12.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query12.q.out @@ -79,11 +79,12 @@ HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3 HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) HiveFilter(condition=[AND(IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books'), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out index 9fab0cbed0..0d133ce64c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out @@ -122,13 +122,14 @@ HiveProject($f0=[/(CAST($0):DOUBLE, $1)], $f1=[CAST(/($2, $3)):DECIMAL(11, 6)], HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_quantity=[$10], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], BETWEEN=[BETWEEN(false, $22, 100:DECIMAL(12, 2), 200:DECIMAL(12, 2))], BETWEEN9=[BETWEEN(false, $22, 150:DECIMAL(12, 2), 300:DECIMAL(12, 2))], BETWEEN10=[BETWEEN(false, $22, 50:DECIMAL(12, 2), 250:DECIMAL(12, 2))], BETWEEN11=[BETWEEN(false, $13, 100:DECIMAL(3, 0), 150:DECIMAL(3, 0))], BETWEEN12=[BETWEEN(false, $13, 50:DECIMAL(2, 0), 100:DECIMAL(3, 0))], BETWEEN13=[BETWEEN(false, $13, 150:DECIMAL(3, 0), 200:DECIMAL(3, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($7), OR(<=(100:DECIMAL(3, 0), $13), <=($13, 150:DECIMAL(3, 0)), <=(50:DECIMAL(2, 0), $13), <=($13, 100:DECIMAL(3, 0)), <=(150:DECIMAL(3, 0), $13), <=($13, 200:DECIMAL(3, 0))), OR(<=(100:DECIMAL(12, 2), $22), <=($22, 200:DECIMAL(12, 2)), <=(150:DECIMAL(12, 2), $22), <=($22, 300:DECIMAL(12, 2)), <=(50:DECIMAL(12, 2), $22), <=($22, 250:DECIMAL(12, 2))))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$1], ss_hdemo_sk=[$2], ss_addr_sk=[$3], ss_store_sk=[$4], ss_quantity=[$5], ss_ext_sales_price=[$6], ss_ext_wholesale_cost=[$7], BETWEEN=[$8], BETWEEN9=[$9], BETWEEN10=[$10], BETWEEN11=[$11], BETWEEN12=[$12], BETWEEN13=[$13], d_date_sk=[$14]) + HiveJoin(condition=[=($0, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_quantity=[$10], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], BETWEEN=[BETWEEN(false, $22, 100:DECIMAL(12, 2), 200:DECIMAL(12, 2))], BETWEEN9=[BETWEEN(false, $22, 150:DECIMAL(12, 2), 300:DECIMAL(12, 2))], BETWEEN10=[BETWEEN(false, $22, 50:DECIMAL(12, 2), 250:DECIMAL(12, 2))], BETWEEN11=[BETWEEN(false, $13, 100:DECIMAL(3, 0), 150:DECIMAL(3, 0))], BETWEEN12=[BETWEEN(false, $13, 50:DECIMAL(2, 0), 100:DECIMAL(3, 0))], BETWEEN13=[BETWEEN(false, $13, 150:DECIMAL(3, 0), 200:DECIMAL(3, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($7), OR(<=(100:DECIMAL(3, 0), $13), <=($13, 150:DECIMAL(3, 0)), <=(50:DECIMAL(2, 0), $13), <=($13, 100:DECIMAL(3, 0)), <=(150:DECIMAL(3, 0), $13), <=($13, 200:DECIMAL(3, 0))), OR(<=(100:DECIMAL(12, 2), $22), <=($22, 200:DECIMAL(12, 2)), <=(150:DECIMAL(12, 2), $22), <=($22, 300:DECIMAL(12, 2)), <=(50:DECIMAL(12, 2), $22), <=($22, 250:DECIMAL(12, 2))))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cd_demo_sk=[$0], ==[=($2, _UTF-16LE'M')], =2=[=($3, _UTF-16LE'4 yr Degree')], =3=[=($2, _UTF-16LE'D')], =4=[=($3, _UTF-16LE'Primary')], =5=[=($2, _UTF-16LE'U')], =6=[=($3, _UTF-16LE'Advanced Degree')]) HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out index adb4a9bde8..79aedc5a32 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out @@ -1,6 +1,6 @@ -Warning: Shuffle Join MERGEJOIN[1178][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[1185][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[1192][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 22' is a cross product +Warning: Shuffle Join MERGEJOIN[1181][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[1196][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 20' is a cross product +Warning: Shuffle Join MERGEJOIN[1207][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 27' is a cross product PREHOOK: query: explain cbo with cross_items as (select i_item_sk ss_item_sk @@ -236,13 +236,14 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_quantity=[$10], ss_list_price=[$12]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 11), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_quantity=[$2], ss_list_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 11), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0]) HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) @@ -333,13 +334,14 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 11), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$1], cs_quantity=[$2], cs_list_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 11), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0]) HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) @@ -430,13 +432,14 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_quantity=[$18], ws_list_price=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 11), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_quantity=[$2], ws_list_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 11), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0]) HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out index 1bfd9d8855..a5d0e6a7fa 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query16.q.out @@ -81,13 +81,14 @@ HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[su HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'NY':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"]) HiveFilter(condition=[AND(=($8, _UTF-16LE'NY'), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_ship_date_sk=[$2], cs_ship_addr_sk=[$10], cs_call_center_sk=[$11], cs_warehouse_sk=[$14], cs_order_number=[$17], cs_ext_ship_cost=[$28], cs_net_profit=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($11), IS NOT NULL($10), IS NOT NULL($17))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs1]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-04-01 00:00:00:TIMESTAMP(9), 2001-05-31 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_ship_date_sk=[$0], cs_ship_addr_sk=[$1], cs_call_center_sk=[$2], cs_warehouse_sk=[$3], cs_order_number=[$4], cs_ext_ship_cost=[$5], cs_net_profit=[$6], d_date_sk=[$7], d_date=[$8]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_ship_date_sk=[$2], cs_ship_addr_sk=[$10], cs_call_center_sk=[$11], cs_warehouse_sk=[$14], cs_order_number=[$17], cs_ext_ship_cost=[$28], cs_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($11), IS NOT NULL($10), IS NOT NULL($17))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs1]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-04-01 00:00:00:TIMESTAMP(9), 2001-05-31 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cc_call_center_sk=[$0], cc_county=[$25]) HiveFilter(condition=[AND(IN($25, _UTF-16LE'Ziebach County', _UTF-16LE'Levy County', _UTF-16LE'Huron County', _UTF-16LE'Franklin Parish', _UTF-16LE'Daviess County'), IS NOT NULL($0))]) HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query18.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query18.q.out index c848aea887..5e92930803 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query18.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query18.q.out @@ -99,15 +99,16 @@ HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$3], sort3=[$0], dir0=[ASC], dir1=[ HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_bill_cdemo_sk=[$4], cs_item_sk=[$15], CAST=[CAST($18):DECIMAL(12, 2)], CAST5=[CAST($20):DECIMAL(12, 2)], CAST6=[CAST($27):DECIMAL(12, 2)], CAST7=[CAST($21):DECIMAL(12, 2)], CAST8=[CAST($33):DECIMAL(12, 2)]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4), IS NOT NULL($3), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(cd_demo_sk=[$0], CAST=[CAST($6):DECIMAL(12, 2)]) - HiveFilter(condition=[AND(=($3, _UTF-16LE'College'), =($1, _UTF-16LE'M'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_bill_cdemo_sk=[$2], cs_item_sk=[$3], CAST=[$4], CAST5=[$5], CAST6=[$6], CAST7=[$7], CAST8=[$8], d_date_sk=[$9], cd_demo_sk=[$10], CAST0=[$11]) + HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_bill_cdemo_sk=[$4], cs_item_sk=[$15], CAST=[CAST($18):DECIMAL(12, 2)], CAST5=[CAST($20):DECIMAL(12, 2)], CAST6=[CAST($27):DECIMAL(12, 2)], CAST7=[CAST($21):DECIMAL(12, 2)], CAST8=[CAST($33):DECIMAL(12, 2)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4), IS NOT NULL($3), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cd_demo_sk=[$0], CAST=[CAST($6):DECIMAL(12, 2)]) + HiveFilter(condition=[AND(=($3, _UTF-16LE'College'), =($1, _UTF-16LE'M'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query20.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query20.q.out index 9aa0653d0b..b55a0d9d4a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query20.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query20.q.out @@ -71,11 +71,12 @@ HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3 HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) HiveFilter(condition=[AND(IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books'), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$1], cs_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query21.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query21.q.out index 3822a36984..99c4343c7f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query21.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query21.q.out @@ -76,15 +76,16 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) - HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($1), IS NOT NULL($0))]) - HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) - HiveProject(d_date_sk=[$0], <=[<(CAST($2):DATE, 1998-04-08)], >==[>=(CAST($2):DATE, 1998-04-08)]) - HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveFilter(condition=[AND(BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], d_date_sk=[$4], <=[$5], >==[$6], i_item_sk=[$7], i_item_id=[$8]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0], <=[<(CAST($2):DATE, 1998-04-08)], >==[>=(CAST($2):DATE, 1998-04-08)]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[AND(BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query22.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query22.q.out index 7231d6e16d..b90092f9cb 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query22.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query22.q.out @@ -58,15 +58,16 @@ HiveSortLimit(sort0=[$4], sort1=[$0], sort2=[$1], sort3=[$2], sort4=[$3], dir0=[ HiveProject(i_item_sk=[$0], i_brand=[$8], i_class=[$10], i_category=[$12], i_product_name=[$21]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($2))]) - HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(w_warehouse_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], d_date_sk=[$4], w_warehouse_sk=[$5]) + HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($2))]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(w_warehouse_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out index 0ad4660c13..65de2a6e6d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out @@ -1,5 +1,5 @@ -Warning: Shuffle Join MERGEJOIN[454][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 25' is a cross product -Warning: Shuffle Join MERGEJOIN[456][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 30' is a cross product +Warning: Shuffle Join MERGEJOIN[458][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 25' is a cross product +Warning: Shuffle Join MERGEJOIN[460][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 30' is a cross product PREHOOK: query: explain cbo with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -139,13 +139,14 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(i_item_sk=[$0], substr=[substr($4, 1, 30)]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 1), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2], cs_quantity=[$3], cs_list_price=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0]) HiveJoin(condition=[>($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f1=[$1]) @@ -168,13 +169,14 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(c_customer_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], *=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(sales=[*(CAST($4):DECIMAL(10, 0), $5)]) HiveSemiJoin(condition=[=($3, $7)], joinType=[semi]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -194,13 +196,14 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(i_item_sk=[$0], substr=[substr($4, 1, 30)]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_customer_sk=[$4], ws_quantity=[$18], ws_list_price=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 1), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_bill_customer_sk=[$2], ws_quantity=[$3], ws_list_price=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_customer_sk=[$4], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0]) HiveJoin(condition=[>($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f1=[$1]) @@ -223,11 +226,12 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(c_customer_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], *=[*(CAST($10):DECIMAL(10, 0), $13)]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], *=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(IN($6, 1999, 2000, 2001, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out index a210632ae4..6ed8ee2836 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query24.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[305][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[307][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain cbo with ssales as (select c_last_name @@ -126,26 +126,28 @@ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[AND(=($1, $10), =($2, $19))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_units=[$18], i_manager_id=[$20]) - HiveFilter(condition=[AND(=($17, _UTF-16LE'orchid'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], c_first_name=[$2], c_last_name=[$3], c_birth_country=[$4], ca_address_sk=[$5], ca_state=[$6], ca_zip=[$7], UPPER=[$8], s_store_sk=[$9], s_store_name=[$10], s_state=[$11], s_zip=[$12]) - HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($7, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($9))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) - HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($0), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(ss_item_sk=[$0], ss_customer_sk=[$1], ss_store_sk=[$2], ss_ticket_number=[$3], ss_sales_price=[$4], i_item_sk=[$5], i_current_price=[$6], i_size=[$7], i_units=[$8], i_manager_id=[$9], c_customer_sk=[$10], c_current_addr_sk=[$11], c_first_name=[$12], c_last_name=[$13], c_birth_country=[$14], ca_address_sk=[$15], ca_state=[$16], ca_zip=[$17], UPPER=[$18], s_store_sk=[$19], s_store_name=[$20], s_state=[$21], s_zip=[$22]) + HiveJoin(condition=[AND(=($1, $10), =($2, $19))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_units=[$18], i_manager_id=[$20]) + HiveFilter(condition=[AND(=($17, _UTF-16LE'orchid'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], c_first_name=[$2], c_last_name=[$3], c_birth_country=[$4], ca_address_sk=[$5], ca_state=[$6], ca_zip=[$7], UPPER=[$8], s_store_sk=[$9], s_store_name=[$10], s_state=[$11], s_zip=[$12]) + HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_state=[$1], ca_zip=[$2], UPPER=[$3], s_store_sk=[$4], s_store_name=[$5], s_state=[$6], s_zip=[$7]) + HiveJoin(condition=[=($7, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($9))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) + HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($0), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(_o__c0=[*(0.05:DECIMAL(3, 2), CAST(/($0, $1)):DECIMAL(21, 6))]) HiveFilter(condition=[IS NOT NULL(CAST(/($0, $1)):DECIMAL(21, 6))]) HiveProject($f0=[$0], $f1=[$1]) @@ -158,17 +160,19 @@ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($2))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($7, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($9))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) - HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($0), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], c_first_name=[$2], c_last_name=[$3], c_birth_country=[$4], ca_address_sk=[$5], ca_state=[$6], ca_zip=[$7], UPPER=[$8], s_store_sk=[$9], s_store_name=[$10], s_state=[$11], s_zip=[$12]) + HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_state=[$1], ca_zip=[$2], UPPER=[$3], s_store_sk=[$4], s_store_name=[$5], s_state=[$6], s_zip=[$7]) + HiveJoin(condition=[=($7, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($9))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) + HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($0), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query25.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query25.q.out index 25b5e69d71..43e1b0aa6c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query25.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query25.q.out @@ -115,31 +115,32 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[AND(=($2, $14), =($1, $13), =($4, $15))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_net_profit=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($2), IS NOT NULL($9))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2], cs_net_profit=[$3], d_date_sk=[$4], sr_returned_date_sk=[$5], sr_item_sk=[$6], sr_customer_sk=[$7], sr_ticket_number=[$8], sr_net_loss=[$9], d_date_sk0=[$10]) - HiveJoin(condition=[AND(=($7, $1), =($6, $2))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_net_profit=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), BETWEEN(false, $8, 4, 10), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$1], sr_customer_sk=[$2], sr_ticket_number=[$3], sr_net_loss=[$4], d_date_sk=[$5]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9], sr_net_loss=[$19]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($2), IS NOT NULL($9))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$3], ss_ticket_number=[$4], ss_net_profit=[$5], d_date_sk=[$6], cs_sold_date_sk=[$7], cs_bill_customer_sk=[$8], cs_item_sk=[$9], cs_net_profit=[$10], d_date_sk0=[$11], sr_returned_date_sk=[$12], sr_item_sk=[$13], sr_customer_sk=[$14], sr_ticket_number=[$15], sr_net_loss=[$16], d_date_sk00=[$17]) + HiveJoin(condition=[AND(=($2, $14), =($1, $13), =($4, $15))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2], cs_net_profit=[$3], d_date_sk=[$4], sr_returned_date_sk=[$5], sr_item_sk=[$6], sr_customer_sk=[$7], sr_ticket_number=[$8], sr_net_loss=[$9], d_date_sk0=[$10]) + HiveJoin(condition=[AND(=($7, $1), =($6, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2000), BETWEEN(false, $8, 4, 10), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$1], sr_customer_sk=[$2], sr_ticket_number=[$3], sr_net_loss=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9], sr_net_loss=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), BETWEEN(false, $8, 4, 10), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query26.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query26.q.out index dcab8e83c5..9462045f3f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query26.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query26.q.out @@ -58,19 +58,20 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($3, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_cdemo_sk=[$4], cs_item_sk=[$15], cs_promo_sk=[$16], cs_quantity=[$18], cs_list_price=[$20], cs_sales_price=[$21], cs_coupon_amt=[$27]) - HiveFilter(condition=[AND(IS NOT NULL($16), IS NOT NULL($0), IS NOT NULL($4), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(cd_demo_sk=[$0]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'W'), =($3, _UTF-16LE'Primary'), =($1, _UTF-16LE'F'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[AND(OR(=($9, _UTF-16LE'N'), =($14, _UTF-16LE'N')), IS NOT NULL($0))]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_cdemo_sk=[$1], cs_item_sk=[$2], cs_promo_sk=[$3], cs_quantity=[$4], cs_list_price=[$5], cs_sales_price=[$6], cs_coupon_amt=[$7], cd_demo_sk=[$8], d_date_sk=[$9], p_promo_sk=[$10]) + HiveJoin(condition=[=($3, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_cdemo_sk=[$4], cs_item_sk=[$15], cs_promo_sk=[$16], cs_quantity=[$18], cs_list_price=[$20], cs_sales_price=[$21], cs_coupon_amt=[$27]) + HiveFilter(condition=[AND(IS NOT NULL($16), IS NOT NULL($0), IS NOT NULL($4), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'W'), =($3, _UTF-16LE'Primary'), =($1, _UTF-16LE'F'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[AND(OR(=($9, _UTF-16LE'N'), =($14, _UTF-16LE'N')), IS NOT NULL($0))]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query27.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query27.q.out index c0f1462b22..07cb54648f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query27.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query27.q.out @@ -63,19 +63,20 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($3, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_cdemo_sk=[$4], ss_store_sk=[$7], ss_quantity=[$10], ss_list_price=[$12], ss_sales_price=[$13], ss_coupon_amt=[$19]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(cd_demo_sk=[$0]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'U'), =($3, _UTF-16LE'2 yr Degree'), =($1, _UTF-16LE'M'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(s_store_sk=[$0], s_state=[$24]) - HiveFilter(condition=[AND(IN($24, _UTF-16LE'SD', _UTF-16LE'FL', _UTF-16LE'MI', _UTF-16LE'LA', _UTF-16LE'MO', _UTF-16LE'SC'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_cdemo_sk=[$2], ss_store_sk=[$3], ss_quantity=[$4], ss_list_price=[$5], ss_sales_price=[$6], ss_coupon_amt=[$7], cd_demo_sk=[$8], d_date_sk=[$9], s_store_sk=[$10], s_state=[$11]) + HiveJoin(condition=[=($3, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_cdemo_sk=[$4], ss_store_sk=[$7], ss_quantity=[$10], ss_list_price=[$12], ss_sales_price=[$13], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'U'), =($3, _UTF-16LE'2 yr Degree'), =($1, _UTF-16LE'M'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_state=[$24]) + HiveFilter(condition=[AND(IN($24, _UTF-16LE'SD', _UTF-16LE'FL', _UTF-16LE'MI', _UTF-16LE'LA', _UTF-16LE'MO', _UTF-16LE'SC'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query29.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query29.q.out index a08138a1e6..d13fbba05d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query29.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query29.q.out @@ -122,22 +122,23 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[AND(=($2, $9), =($1, $8), =($4, $10))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_quantity=[$10]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($2), IS NOT NULL($9))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$1], sr_customer_sk=[$2], sr_ticket_number=[$3], sr_return_quantity=[$4], d_date_sk=[$5]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9], sr_return_quantity=[$10]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($2), IS NOT NULL($9))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_store_sk=[$3], ss_ticket_number=[$4], ss_quantity=[$5], d_date_sk=[$6], sr_returned_date_sk=[$7], sr_item_sk=[$8], sr_customer_sk=[$9], sr_ticket_number=[$10], sr_return_quantity=[$11], d_date_sk0=[$12]) + HiveJoin(condition=[AND(=($2, $9), =($1, $8), =($4, $10))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_quantity=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), BETWEEN(false, $8, 4, 7), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$1], sr_customer_sk=[$2], sr_ticket_number=[$3], sr_return_quantity=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9], sr_return_quantity=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), BETWEEN(false, $8, 4, 7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query30.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query30.q.out index 6fd6bfc668..9fc3b45105 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query30.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query30.q.out @@ -89,13 +89,14 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5= HiveProject(ca_address_sk=[$0], ca_state=[$8]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($8))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(wr_returned_date_sk=[$0], wr_returning_customer_sk=[$7], wr_returning_addr_sk=[$10], wr_return_amt=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0), IS NOT NULL($10))]) - HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(wr_returned_date_sk=[$0], wr_returning_customer_sk=[$1], wr_returning_addr_sk=[$2], wr_return_amt=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returned_date_sk=[$0], wr_returning_customer_sk=[$7], wr_returning_addr_sk=[$10], wr_return_amt=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0), IS NOT NULL($10))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2, 1))], ctr_state=[$0]) HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]) HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[count($2)]) @@ -105,11 +106,12 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5= HiveProject(ca_address_sk=[$0], ca_state=[$8]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($8))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(wr_returned_date_sk=[$0], wr_returning_customer_sk=[$7], wr_returning_addr_sk=[$10], wr_return_amt=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($10))]) - HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(wr_returned_date_sk=[$0], wr_returning_customer_sk=[$1], wr_returning_addr_sk=[$2], wr_return_amt=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returned_date_sk=[$0], wr_returning_customer_sk=[$7], wr_returning_addr_sk=[$10], wr_return_amt=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($10))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query31.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query31.q.out index 5fd5f94034..36fec89400 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query31.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query31.q.out @@ -121,39 +121,42 @@ HiveProject(ca_county=[$8], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($ HiveProject(ca_address_sk=[$0], ca_county=[$7]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 1), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$1], ws_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($10, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(ca_county=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$1], ws_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($10, 3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0:DECIMAL(1, 0))]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 2), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$1], ws_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($10, 2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f1=[$1], $f00=[$2], $f10=[$3], $f01=[$4], $f11=[$5]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -163,37 +166,40 @@ HiveProject(ca_county=[$8], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($ HiveProject(ca_address_sk=[$0], ca_county=[$7]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 2), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$1], ss_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($10, 2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(ca_county=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 1), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$1], ss_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($10, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(ca_county=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$1], ss_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($10, 3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query33.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query33.q.out index 8419b7067a..6ea1156614 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query33.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query33.q.out @@ -182,13 +182,14 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[AND(=($11, -6:DECIMAL(1, 0)), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_addr_sk=[$2], ss_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_manufact_id=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($7)]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -205,13 +206,14 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[AND(=($11, -6:DECIMAL(1, 0)), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$1], cs_item_sk=[$2], cs_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_manufact_id=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($7)]) HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -228,11 +230,12 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[AND(=($11, -6:DECIMAL(1, 0)), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_bill_addr_sk=[$2], ws_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out index 19540430e3..b71e337515 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out @@ -69,13 +69,14 @@ HiveAggregate(group=[{}], agg#0=[count()]) HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], d_date_sk=[$2], d_date=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) @@ -84,13 +85,14 @@ HiveAggregate(group=[{}], agg#0=[count()]) HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], d_date_sk=[$2], d_date=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) @@ -99,11 +101,12 @@ HiveAggregate(group=[{}], agg#0=[count()]) HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], d_date_sk=[$2], d_date=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out index 4f566d0952..2aedce0e66 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out @@ -74,13 +74,14 @@ HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], d_moy=[CAST(4):INTEGER], mean=[ HiveProject(i_item_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($2), IS NOT NULL($0))]) - HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) @@ -93,13 +94,14 @@ HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], d_moy=[CAST(4):INTEGER], mean=[ HiveProject(i_item_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($2), IS NOT NULL($0))]) - HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 5), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($2), IS NOT NULL($0))]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 5), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query4.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query4.q.out index 7b01a6e8b9..64e2947092 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query4.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query4.q.out @@ -236,83 +236,91 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[AND(=($4, $0), CASE($8, CASE($11, >(/($1, $10), /($3, $7)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f8=[$7]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], /=[$2], d_date_sk=[$3]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), 2:DECIMAL(10, 0))]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($2, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f8=[$7]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f8=[$7]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject($f0=[$0], $f8=[$1], $f00=[$2], $f80=[$3], $f000=[$4], $f800=[$5], customer_id=[$6], year_total=[$7], CAST=[$8], customer_id0=[$9], year_total0=[$10], CAST0=[$11]) + HiveJoin(condition=[AND(=($4, $0), CASE($8, CASE($11, >(/($1, $10), /($3, $7)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f8=[$7]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], /=[$2], d_date_sk=[$3]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f8=[$1], $f00=[$2], $f80=[$3], customer_id=[$4], year_total=[$5], CAST=[$6], customer_id0=[$7], year_total0=[$8], CAST0=[$9]) + HiveJoin(condition=[=($2, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f8=[$7]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], /=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f8=[$7]) + HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], /=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) + HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], /=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) + HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], /=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query40.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query40.q.out index 312ac30dd4..f590dce3a7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query40.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query40.q.out @@ -73,19 +73,20 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(w_warehouse_sk=[$0], w_state=[$10]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) - HiveJoin(condition=[=($11, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($3, $6), =($2, $5))], joinType=[left], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_warehouse_sk=[$14], cs_item_sk=[$15], cs_order_number=[$17], cs_sales_price=[$21]) - HiveFilter(condition=[AND(IS NOT NULL($14), IS NOT NULL($0), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_refunded_cash=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($16), IS NOT NULL($2))]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(d_date_sk=[$0], <=[<(CAST($2):DATE, 1998-04-08)], >==[>=(CAST($2):DATE, 1998-04-08)]) - HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveFilter(condition=[AND(BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_sold_date_sk=[$0], cs_warehouse_sk=[$1], cs_item_sk=[$2], cs_order_number=[$3], cs_sales_price=[$4], cr_item_sk=[$5], cr_order_number=[$6], cr_refunded_cash=[$7], d_date_sk=[$8], <=[$9], >==[$10], i_item_sk=[$11], i_item_id=[$12]) + HiveJoin(condition=[=($11, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($3, $6), =($2, $5))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_warehouse_sk=[$14], cs_item_sk=[$15], cs_order_number=[$17], cs_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($14), IS NOT NULL($0), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_refunded_cash=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($16), IS NOT NULL($2))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0], <=[<(CAST($2):DATE, 1998-04-08)], >==[>=(CAST($2):DATE, 1998-04-08)]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[AND(BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query46.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query46.q.out index a7491f55d8..d686007a81 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query46.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query46.q.out @@ -97,19 +97,20 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], dir0=[ HiveProject(ca_address_sk=[$0], ca_city=[$6]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_coupon_amt=[$19], ss_net_profit=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(IN($6, 1998, 1999, 2000), IN($7, 6, 0), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[AND(IN($22, _UTF-16LE'Cedar Grove', _UTF-16LE'Wildwood', _UTF-16LE'Union', _UTF-16LE'Salem', _UTF-16LE'Highland Park'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[AND(OR(=($3, 2), =($4, 1)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], ss_hdemo_sk=[$2], ss_addr_sk=[$3], ss_store_sk=[$4], ss_ticket_number=[$5], ss_coupon_amt=[$6], ss_net_profit=[$7], d_date_sk=[$8], s_store_sk=[$9], hd_demo_sk=[$10]) + HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_coupon_amt=[$19], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(IN($6, 1998, 1999, 2000), IN($7, 6, 0), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[AND(IN($22, _UTF-16LE'Cedar Grove', _UTF-16LE'Wildwood', _UTF-16LE'Union', _UTF-16LE'Salem', _UTF-16LE'Highland Park'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(OR(=($3, 2), =($4, 1)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query47.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query47.q.out index 60d80e010f..4a3f5a667e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query47.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query47.q.out @@ -123,27 +123,7 @@ HiveProject(i_category=[$0], d_year=[$1], d_moy=[$2], avg_monthly_sales=[$3], su HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($8))]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) - HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($17))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[AND(=($6, $0), =($7, $1), =($8, $2), =($9, $3), =($14, $5))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$4], +=[+($5, 1)]) - HiveFilter(condition=[IS NOT NULL($5)]) - HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4, $5 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) - HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6]) - HiveAggregate(group=[{1, 2, 8, 9, 11, 12}], agg#0=[sum($6)]) - HiveJoin(condition=[=($5, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($8))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_store_sk=[$2], ss_sales_price=[$3], d_date_sk=[$4], d_year=[$5], d_moy=[$6]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) @@ -151,27 +131,51 @@ HiveProject(i_category=[$0], d_year=[$1], d_moy=[$2], avg_monthly_sales=[$3], su HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($17))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_table_or_col d_year)=[$4], (tok_table_or_col d_moy)=[$5], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[$7], rank_window_1=[$8]) - HiveFilter(condition=[AND(=($4, 2000), >($7, 0:DECIMAL(1, 0)), CASE(>($7, 0:DECIMAL(1, 0)), >(/(ABS(-($6, $7)), $7), 0.1:DECIMAL(2, 1)), false), IS NOT NULL($8))]) - HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_table_or_col d_year)=[$2], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[avg($6) OVER (PARTITION BY $1, $0, $4, $5, $2 ORDER BY $1 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $5 NULLS FIRST, $2 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4, $5 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) - HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6]) - HiveAggregate(group=[{1, 2, 8, 9, 11, 12}], agg#0=[sum($6)]) - HiveJoin(condition=[=($5, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($8))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) - HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($17))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($17))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$4], +=[$5], (tok_table_or_col i_category)0=[$6], (tok_table_or_col i_brand)0=[$7], (tok_table_or_col s_store_name)0=[$8], (tok_table_or_col s_company_name)0=[$9], (tok_table_or_col d_year)=[$10], (tok_table_or_col d_moy)=[$11], (tok_function sum (tok_table_or_col ss_sales_price))0=[$12], avg_window_0=[$13], rank_window_1=[$14]) + HiveJoin(condition=[AND(=($6, $0), =($7, $1), =($8, $2), =($9, $3), =($14, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$4], +=[+($5, 1)]) + HiveFilter(condition=[IS NOT NULL($5)]) + HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4, $5 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6]) + HiveAggregate(group=[{1, 2, 8, 9, 11, 12}], agg#0=[sum($6)]) + HiveJoin(condition=[=($5, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_store_sk=[$2], ss_sales_price=[$3], d_date_sk=[$4], d_year=[$5], d_moy=[$6]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($17))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_table_or_col d_year)=[$4], (tok_table_or_col d_moy)=[$5], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[$7], rank_window_1=[$8]) + HiveFilter(condition=[AND(=($4, 2000), >($7, 0:DECIMAL(1, 0)), CASE(>($7, 0:DECIMAL(1, 0)), >(/(ABS(-($6, $7)), $7), 0.1:DECIMAL(2, 1)), false), IS NOT NULL($8))]) + HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col s_store_name)=[$4], (tok_table_or_col s_company_name)=[$5], (tok_table_or_col d_year)=[$2], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[avg($6) OVER (PARTITION BY $1, $0, $4, $5, $2 ORDER BY $1 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $5 NULLS FIRST, $2 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4, $5 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], s_store_name=[$4], s_company_name=[$5], $f6=[$6]) + HiveAggregate(group=[{1, 2, 8, 9, 11, 12}], agg#0=[sum($6)]) + HiveJoin(condition=[=($5, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_store_sk=[$2], ss_sales_price=[$3], d_date_sk=[$4], d_year=[$5], d_moy=[$6]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($17))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query48.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query48.q.out index 97c9f47e38..a8085c6bfa 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query48.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query48.q.out @@ -146,19 +146,22 @@ HiveAggregate(group=[{}], agg#0=[sum($10)]) HiveProject(s_store_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[AND(=($7, $0), OR(AND($1, $10), AND($2, $11), AND($3, $12)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'M'), =($3, _UTF-16LE'4 yr Degree'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) - HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_addr_sk=[$6], ss_store_sk=[$7], ss_quantity=[$10], BETWEEN=[BETWEEN(false, $22, 0:DECIMAL(12, 2), 2000:DECIMAL(12, 2))], BETWEEN6=[BETWEEN(false, $22, 150:DECIMAL(12, 2), 3000:DECIMAL(12, 2))], BETWEEN7=[BETWEEN(false, $22, 50:DECIMAL(12, 2), 25000:DECIMAL(12, 2))]) - HiveFilter(condition=[AND(OR(BETWEEN(false, $13, 100:DECIMAL(3, 0), 150:DECIMAL(3, 0)), BETWEEN(false, $13, 50:DECIMAL(2, 0), 100:DECIMAL(3, 0)), BETWEEN(false, $13, 150:DECIMAL(3, 0), 200:DECIMAL(3, 0))), IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($7), OR(<=(100:DECIMAL(3, 0), $13), <=($13, 150:DECIMAL(3, 0)), <=(50:DECIMAL(2, 0), $13), <=($13, 100:DECIMAL(3, 0)), <=(150:DECIMAL(3, 0), $13), <=($13, 200:DECIMAL(3, 0))), OR(<=(0:DECIMAL(12, 2), $22), <=($22, 2000:DECIMAL(12, 2)), <=(150:DECIMAL(12, 2), $22), <=($22, 3000:DECIMAL(12, 2)), <=(50:DECIMAL(12, 2), $22), <=($22, 25000:DECIMAL(12, 2))))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_address_sk=[$0], IN=[$1], IN2=[$2], IN3=[$3], cd_demo_sk=[$4], ss_sold_date_sk=[$5], ss_cdemo_sk=[$6], ss_addr_sk=[$7], ss_store_sk=[$8], ss_quantity=[$9], BETWEEN=[$10], BETWEEN6=[$11], BETWEEN7=[$12], d_date_sk=[$13]) + HiveJoin(condition=[AND(=($7, $0), OR(AND($1, $10), AND($2, $11), AND($3, $12)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(cd_demo_sk=[$0], ss_sold_date_sk=[$1], ss_cdemo_sk=[$2], ss_addr_sk=[$3], ss_store_sk=[$4], ss_quantity=[$5], BETWEEN=[$6], BETWEEN6=[$7], BETWEEN7=[$8], d_date_sk=[$9]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'M'), =($3, _UTF-16LE'4 yr Degree'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$1], ss_addr_sk=[$2], ss_store_sk=[$3], ss_quantity=[$4], BETWEEN=[$5], BETWEEN6=[$6], BETWEEN7=[$7], d_date_sk=[$8]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_addr_sk=[$6], ss_store_sk=[$7], ss_quantity=[$10], BETWEEN=[BETWEEN(false, $22, 0:DECIMAL(12, 2), 2000:DECIMAL(12, 2))], BETWEEN6=[BETWEEN(false, $22, 150:DECIMAL(12, 2), 3000:DECIMAL(12, 2))], BETWEEN7=[BETWEEN(false, $22, 50:DECIMAL(12, 2), 25000:DECIMAL(12, 2))]) + HiveFilter(condition=[AND(OR(BETWEEN(false, $13, 100:DECIMAL(3, 0), 150:DECIMAL(3, 0)), BETWEEN(false, $13, 50:DECIMAL(2, 0), 100:DECIMAL(3, 0)), BETWEEN(false, $13, 150:DECIMAL(3, 0), 200:DECIMAL(3, 0))), IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($7), OR(<=(100:DECIMAL(3, 0), $13), <=($13, 150:DECIMAL(3, 0)), <=(50:DECIMAL(2, 0), $13), <=($13, 100:DECIMAL(3, 0)), <=(150:DECIMAL(3, 0), $13), <=($13, 200:DECIMAL(3, 0))), OR(<=(0:DECIMAL(12, 2), $22), <=($22, 2000:DECIMAL(12, 2)), <=(150:DECIMAL(12, 2), $22), <=($22, 3000:DECIMAL(12, 2)), <=(50:DECIMAL(12, 2), $22), <=($22, 25000:DECIMAL(12, 2))))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out index 99569a8431..70399eaaee 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out @@ -285,13 +285,14 @@ HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(wr_item_sk=[$2], wr_order_number=[$13], CASE=[CASE(IS NOT NULL($14), $14, 0)], CASE3=[CASE(IS NOT NULL($15), $15, 0:DECIMAL(12, 2))]) HiveFilter(condition=[AND(>($15, 10000:DECIMAL(5, 0)), IS NOT NULL($13), IS NOT NULL($2))]) HiveTableScan(table=[[default, web_returns]], table:alias=[wr]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], CASE=[CASE(IS NOT NULL($18), $18, 0)], CASE4=[CASE(IS NOT NULL($29), $29, 0:DECIMAL(12, 2))]) - HiveFilter(condition=[AND(>($33, 1:DECIMAL(1, 0)), >($29, 0:DECIMAL(1, 0)), >($18, 0), IS NOT NULL($0), IS NOT NULL($17), IS NOT NULL($3))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 12), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_order_number=[$2], CASE=[$3], CASE4=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], CASE=[CASE(IS NOT NULL($18), $18, 0)], CASE4=[CASE(IS NOT NULL($29), $29, 0:DECIMAL(12, 2))]) + HiveFilter(condition=[AND(>($33, 1:DECIMAL(1, 0)), >($29, 0:DECIMAL(1, 0)), >($18, 0), IS NOT NULL($0), IS NOT NULL($17), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(channel=[_UTF-16LE'catalog':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], item=[$0], return_ratio=[$1], return_rank=[$2], currency_rank=[$3]) HiveFilter(condition=[OR(<=($2, 10), <=($3, 10))]) HiveProject(item=[$0], return_ratio=[/(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4))], rank_window_0=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($3):DECIMAL(15, 4), CAST($4):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) @@ -301,13 +302,14 @@ HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(cr_item_sk=[$2], cr_order_number=[$16], CASE=[CASE(IS NOT NULL($17), $17, 0)], CASE3=[CASE(IS NOT NULL($18), $18, 0:DECIMAL(12, 2))]) HiveFilter(condition=[AND(>($18, 10000:DECIMAL(5, 0)), IS NOT NULL($16), IS NOT NULL($2))]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[cr]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], CASE=[CASE(IS NOT NULL($18), $18, 0)], CASE4=[CASE(IS NOT NULL($29), $29, 0:DECIMAL(12, 2))]) - HiveFilter(condition=[AND(>($33, 1:DECIMAL(1, 0)), >($29, 0:DECIMAL(1, 0)), >($18, 0), IS NOT NULL($0), IS NOT NULL($17), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 12), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$1], cs_order_number=[$2], CASE=[$3], CASE4=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], CASE=[CASE(IS NOT NULL($18), $18, 0)], CASE4=[CASE(IS NOT NULL($29), $29, 0:DECIMAL(12, 2))]) + HiveFilter(condition=[AND(>($33, 1:DECIMAL(1, 0)), >($29, 0:DECIMAL(1, 0)), >($18, 0), IS NOT NULL($0), IS NOT NULL($17), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(channel=[_UTF-16LE'store':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], item=[$0], return_ratio=[$1], return_rank=[$2], currency_rank=[$3]) HiveFilter(condition=[OR(<=($2, 10), <=($3, 10))]) HiveProject(item=[$0], return_ratio=[/(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4))], rank_window_0=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($3):DECIMAL(15, 4), CAST($4):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) @@ -317,11 +319,12 @@ HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], CASE=[CASE(IS NOT NULL($10), $10, 0)], CASE3=[CASE(IS NOT NULL($11), $11, 0:DECIMAL(12, 2))]) HiveFilter(condition=[AND(>($11, 10000:DECIMAL(5, 0)), IS NOT NULL($9), IS NOT NULL($2))]) HiveTableScan(table=[[default, store_returns]], table:alias=[sr]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], CASE=[CASE(IS NOT NULL($10), $10, 0)], CASE4=[CASE(IS NOT NULL($20), $20, 0:DECIMAL(12, 2))]) - HiveFilter(condition=[AND(>($22, 1:DECIMAL(1, 0)), >($20, 0:DECIMAL(1, 0)), >($10, 0), IS NOT NULL($0), IS NOT NULL($9), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[sts]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 12), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_ticket_number=[$2], CASE=[$3], CASE4=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], CASE=[CASE(IS NOT NULL($10), $10, 0)], CASE4=[CASE(IS NOT NULL($20), $20, 0:DECIMAL(12, 2))]) + HiveFilter(condition=[AND(>($22, 1:DECIMAL(1, 0)), >($20, 0:DECIMAL(1, 0)), >($10, 0), IS NOT NULL($0), IS NOT NULL($9), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[sts]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query5.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query5.q.out index 9a09d8996d..f2e6bbb45a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query5.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query5.q.out @@ -304,18 +304,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) - HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(page_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) - HiveUnion(all=[true]) - HiveProject(page_sk=[$12], date_sk=[$0], sales_price=[$23], profit=[$33], return_amt=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], net_loss=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(page_sk=[$12], date_sk=[$0], sales_price=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], profit=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], return_amt=[$18], net_loss=[$26]) - HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($0))]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(page_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5], d_date_sk=[$6]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(page_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) + HiveUnion(all=[true]) + HiveProject(page_sk=[$12], date_sk=[$0], sales_price=[$23], profit=[$33], return_amt=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], net_loss=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(page_sk=[$12], date_sk=[$0], sales_price=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], profit=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], return_amt=[$18], net_loss=[$26]) + HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(channel=[_UTF-16LE'web channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'web_site', $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) HiveAggregate(group=[{8}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query50.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query50.q.out index 4fe81c9dfd..7dcea9e500 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query50.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query50.q.out @@ -135,13 +135,14 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5= HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($2))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($9), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 9), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$1], sr_customer_sk=[$2], sr_ticket_number=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($9), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query53.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query53.q.out index a440eb2eed..010333292f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query53.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query53.q.out @@ -73,15 +73,16 @@ HiveSortLimit(sort0=[$2], sort1=[$1], sort2=[$0], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(s_store_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) - HiveFilter(condition=[AND(IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'reference', _UTF-16LE'self-help', _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9', _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'), IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics', _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'reference', _UTF-16LE'self-help'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9')), AND(IN($12, _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), IN($10, _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'))), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0], d_qoy=[$10]) - HiveFilter(condition=[AND(IN($3, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_store_sk=[$2], ss_sales_price=[$3], i_item_sk=[$4], i_manufact_id=[$5], d_date_sk=[$6], d_qoy=[$7]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) + HiveFilter(condition=[AND(IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'reference', _UTF-16LE'self-help', _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9', _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'), IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics', _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'reference', _UTF-16LE'self-help'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9')), AND(IN($12, _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), IN($10, _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_qoy=[$10]) + HiveFilter(condition=[AND(IN($3, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out index bb2caccc26..a72e2edec5 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[274][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[275][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[276][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[274][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[275][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[276][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[277][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 7' is a cross product PREHOOK: query: explain cbo with my_customers as ( select distinct c_customer_sk @@ -163,22 +163,23 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2]) - HiveUnion(all=[true]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(sold_date_sk=[$0], customer_sk=[$4], item_sk=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[AND(=($10, _UTF-16LE'consignment'), =($12, _UTF-16LE'Jewelry'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2], d_date_sk=[$3], i_item_sk=[$4]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2]) + HiveUnion(all=[true]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(sold_date_sk=[$0], customer_sk=[$4], item_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[AND(=($10, _UTF-16LE'consignment'), =($12, _UTF-16LE'Jewelry'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(cnt=[$0]) HiveFilter(condition=[<=(sq_count_check($0), 1)]) HiveProject(cnt=[$0]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query56.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query56.q.out index 8a3780b20d..11d551ad49 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query56.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query56.q.out @@ -168,13 +168,14 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[AND(=($11, -8:DECIMAL(1, 0)), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 1), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_addr_sk=[$2], ss_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($7)]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -191,13 +192,14 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[AND(=($11, -8:DECIMAL(1, 0)), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 1), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$1], cs_item_sk=[$2], cs_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($7)]) HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -214,11 +216,12 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[AND(=($11, -8:DECIMAL(1, 0)), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 1), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_bill_addr_sk=[$2], ws_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query57.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query57.q.out index f0dd0f2f37..729d08d1c4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query57.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query57.q.out @@ -116,47 +116,7 @@ HiveProject(i_category=[$0], i_brand=[$1], d_year=[$2], d_moy=[$3], avg_monthly_ HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($8))]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_item_sk=[$15], cs_sales_price=[$21]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($11), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) - HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) - HiveJoin(condition=[AND(=($5, $0), =($6, $1), =($7, $2), =($12, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_function sum (tok_table_or_col cs_sales_price))=[$3], +=[+($4, 1)]) - HiveFilter(condition=[IS NOT NULL($4)]) - HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col cc_name)=[$4], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) - HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], cc_name=[$4], $f5=[$5]) - HiveAggregate(group=[{1, 2, 8, 9, 11}], agg#0=[sum($6)]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($8))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_item_sk=[$15], cs_sales_price=[$21]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($11), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) - HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) - HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_table_or_col d_year)=[$3], (tok_table_or_col d_moy)=[$4], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], avg_window_0=[$6], rank_window_1=[$7]) - HiveFilter(condition=[AND(=($3, 2000), >($6, 0:DECIMAL(1, 0)), CASE(>($6, 0:DECIMAL(1, 0)), >(/(ABS(-($5, $6)), $6), 0.1:DECIMAL(2, 1)), false), IS NOT NULL($7))]) - HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col cc_name)=[$4], (tok_table_or_col d_year)=[$2], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], avg_window_0=[avg($5) OVER (PARTITION BY $1, $0, $4, $2 ORDER BY $1 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $2 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) - HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], cc_name=[$4], $f5=[$5]) - HiveAggregate(group=[{1, 2, 8, 9, 11}], agg#0=[sum($6)]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($8))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$1], cs_item_sk=[$2], cs_sales_price=[$3], d_date_sk=[$4], d_year=[$5], d_moy=[$6], cc_call_center_sk=[$7], cc_name=[$8]) HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_item_sk=[$15], cs_sales_price=[$21]) @@ -168,4 +128,48 @@ HiveProject(i_category=[$0], i_brand=[$1], d_year=[$2], d_moy=[$3], avg_monthly_ HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_function sum (tok_table_or_col cs_sales_price))=[$3], +=[$4], (tok_table_or_col i_category)0=[$5], (tok_table_or_col i_brand)0=[$6], (tok_table_or_col cc_name)0=[$7], (tok_table_or_col d_year)=[$8], (tok_table_or_col d_moy)=[$9], (tok_function sum (tok_table_or_col cs_sales_price))0=[$10], avg_window_0=[$11], rank_window_1=[$12]) + HiveJoin(condition=[AND(=($5, $0), =($6, $1), =($7, $2), =($12, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_function sum (tok_table_or_col cs_sales_price))=[$3], +=[+($4, 1)]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col cc_name)=[$4], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], cc_name=[$4], $f5=[$5]) + HiveAggregate(group=[{1, 2, 8, 9, 11}], agg#0=[sum($6)]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$1], cs_item_sk=[$2], cs_sales_price=[$3], d_date_sk=[$4], d_year=[$5], d_moy=[$6], cc_call_center_sk=[$7], cc_name=[$8]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_item_sk=[$15], cs_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($11), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_table_or_col d_year)=[$3], (tok_table_or_col d_moy)=[$4], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], avg_window_0=[$6], rank_window_1=[$7]) + HiveFilter(condition=[AND(=($3, 2000), >($6, 0:DECIMAL(1, 0)), CASE(>($6, 0:DECIMAL(1, 0)), >(/(ABS(-($5, $6)), $6), 0.1:DECIMAL(2, 1)), false), IS NOT NULL($7))]) + HiveProject((tok_table_or_col i_category)=[$1], (tok_table_or_col i_brand)=[$0], (tok_table_or_col cc_name)=[$4], (tok_table_or_col d_year)=[$2], (tok_table_or_col d_moy)=[$3], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], avg_window_0=[avg($5) OVER (PARTITION BY $1, $0, $4, $2 ORDER BY $1 NULLS FIRST, $0 NULLS FIRST, $4 NULLS FIRST, $2 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY $1, $0, $4 ORDER BY $2 NULLS LAST, $3 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(i_brand=[$0], i_category=[$1], d_year=[$2], d_moy=[$3], cc_name=[$4], $f5=[$5]) + HiveAggregate(group=[{1, 2, 8, 9, 11}], agg#0=[sum($6)]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$1], cs_item_sk=[$2], cs_sales_price=[$3], d_date_sk=[$4], d_year=[$5], d_moy=[$6], cc_call_center_sk=[$7], cc_name=[$8]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_item_sk=[$15], cs_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($11), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out index 52e9e79860..35c9bedb77 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[405][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 22' is a cross product +Warning: Shuffle Join MERGEJOIN[408][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 23' is a cross product PREHOOK: query: explain cbo with ss_items as (select i_item_id item_id @@ -165,16 +165,17 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cnt=[$0], d_week_seq=[$1]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f1=[$1], *=[*(0.9:DECIMAL(2, 1), $1)], *3=[*(1.1:DECIMAL(2, 1), $1)]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -196,16 +197,17 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cnt=[$0], d_week_seq=[$1]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f1=[$1], *=[*(0.9:DECIMAL(2, 1), $1)], *3=[*(1.1:DECIMAL(2, 1), $1)]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -227,14 +229,15 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(d_date=[$2], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cnt=[$0], d_week_seq=[$1]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out index abc5d999b5..80fbb75ed9 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out @@ -100,28 +100,10 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($9, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8]) - HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)], agg#4=[sum($6)], agg#5=[sum($7)], agg#6=[sum($8)]) - HiveProject($f0=[$4], $f1=[$1], $f2=[CASE($5, $2, null:DECIMAL(7, 2))], $f3=[CASE($6, $2, null:DECIMAL(7, 2))], $f4=[CASE($7, $2, null:DECIMAL(7, 2))], $f5=[CASE($8, $2, null:DECIMAL(7, 2))], $f6=[CASE($9, $2, null:DECIMAL(7, 2))], $f7=[CASE($10, $2, null:DECIMAL(7, 2))], $f8=[CASE($11, $2, null:DECIMAL(7, 2))]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_week_seq=[$4], ==[=($14, _UTF-16LE'Sunday')], =3=[=($14, _UTF-16LE'Monday')], =4=[=($14, _UTF-16LE'Tuesday')], =5=[=($14, _UTF-16LE'Wednesday')], =6=[=($14, _UTF-16LE'Thursday')], =7=[=($14, _UTF-16LE'Friday')], =8=[=($14, _UTF-16LE'Saturday')]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1185, 1196), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d]) - HiveProject(d_week_seq2=[$2], s_store_id2=[$1], sun_sales2=[$4], mon_sales2=[$5], wed_sales2=[$6], thu_sales2=[$7], fri_sales2=[$8], sat_sales2=[$9]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0], s_store_id=[$1]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) - HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($5)], agg#3=[sum($6)], agg#4=[sum($7)], agg#5=[sum($8)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8], d_week_seq=[$9]) + HiveJoin(condition=[=($9, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8]) + HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)], agg#4=[sum($6)], agg#5=[sum($7)], agg#6=[sum($8)]) HiveProject($f0=[$4], $f1=[$1], $f2=[CASE($5, $2, null:DECIMAL(7, 2))], $f3=[CASE($6, $2, null:DECIMAL(7, 2))], $f4=[CASE($7, $2, null:DECIMAL(7, 2))], $f5=[CASE($8, $2, null:DECIMAL(7, 2))], $f6=[CASE($9, $2, null:DECIMAL(7, 2))], $f7=[CASE($10, $2, null:DECIMAL(7, 2))], $f8=[CASE($11, $2, null:DECIMAL(7, 2))]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) @@ -131,6 +113,26 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1197, 1208), IS NOT NULL($4))]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1185, 1196), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[d]) + HiveProject(d_week_seq2=[$2], s_store_id2=[$1], sun_sales2=[$4], mon_sales2=[$5], wed_sales2=[$6], thu_sales2=[$7], fri_sales2=[$8], sat_sales2=[$9]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_id=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], d_week_seq=[$8]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) + HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($5)], agg#3=[sum($6)], agg#4=[sum($7)], agg#5=[sum($8)]) + HiveProject($f0=[$4], $f1=[$1], $f2=[CASE($5, $2, null:DECIMAL(7, 2))], $f3=[CASE($6, $2, null:DECIMAL(7, 2))], $f4=[CASE($7, $2, null:DECIMAL(7, 2))], $f5=[CASE($8, $2, null:DECIMAL(7, 2))], $f6=[CASE($9, $2, null:DECIMAL(7, 2))], $f7=[CASE($10, $2, null:DECIMAL(7, 2))], $f8=[CASE($11, $2, null:DECIMAL(7, 2))]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4], ==[=($14, _UTF-16LE'Sunday')], =3=[=($14, _UTF-16LE'Monday')], =4=[=($14, _UTF-16LE'Tuesday')], =5=[=($14, _UTF-16LE'Wednesday')], =6=[=($14, _UTF-16LE'Thursday')], =7=[=($14, _UTF-16LE'Friday')], =8=[=($14, _UTF-16LE'Saturday')]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1197, 1208), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query6.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query6.q.out index 29e0d43033..87d3dd8c3b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query6.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query6.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[173][bigTable=?] in task 'Reducer 15' is a cross product +Warning: Map Join MAPJOIN[175][bigTable=?] in task 'Reducer 17' is a cross product PREHOOK: query: explain cbo select a.ca_state state, count(*) cnt from customer_address a @@ -72,14 +72,15 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($2))]) HiveTableScan(table=[[default, store_sales]], table:alias=[s]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0], d_month_seq=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d]) - HiveProject(d_month_seq=[$0]) - HiveAggregate(group=[{3}]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 2), IS NOT NULL($3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_month_seq=[$1], $f0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d]) + HiveProject(d_month_seq=[$0]) + HiveAggregate(group=[{3}]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 2), IS NOT NULL($3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], ca_address_sk=[$2], ca_state=[$3]) HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) @@ -93,18 +94,19 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(i_item_sk=[$0], i_current_price=[$5], i_category=[$12]) HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($0), IS NOT NULL($12))]) HiveTableScan(table=[[default, item]], table:alias=[i]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_category=[$0], *=[*(1.2:DECIMAL(2, 1), CAST(CAST(/($1, $2)):DECIMAL(11, 6)):DECIMAL(16, 6))]) - HiveFilter(condition=[IS NOT NULL(CAST(CAST(/($1, $2)):DECIMAL(11, 6)):DECIMAL(16, 6))]) - HiveAggregate(group=[{12}], agg#0=[sum($5)], agg#1=[count($5)]) - HiveFilter(condition=[IS NOT NULL($12)]) - HiveTableScan(table=[[default, item]], table:alias=[j]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveProject(d_month_seq=[$0]) - HiveAggregate(group=[{3}]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_category=[$0], *=[$1], cnt=[$2]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_category=[$0], *=[*(1.2:DECIMAL(2, 1), CAST(CAST(/($1, $2)):DECIMAL(11, 6)):DECIMAL(16, 6))]) + HiveFilter(condition=[IS NOT NULL(CAST(CAST(/($1, $2)):DECIMAL(11, 6)):DECIMAL(16, 6))]) + HiveAggregate(group=[{12}], agg#0=[sum($5)], agg#1=[count($5)]) + HiveFilter(condition=[IS NOT NULL($12)]) + HiveTableScan(table=[[default, item]], table:alias=[j]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject(d_month_seq=[$0]) + HiveAggregate(group=[{3}]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query60.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query60.q.out index 8802220d23..1978dee1d9 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query60.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query60.q.out @@ -188,13 +188,14 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[AND(=($11, -6:DECIMAL(1, 0)), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 9), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_addr_sk=[$2], ss_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($7)]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -211,13 +212,14 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[AND(=($11, -6:DECIMAL(1, 0)), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 9), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$1], cs_item_sk=[$2], cs_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($7)]) HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -234,11 +236,12 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[AND(=($11, -6:DECIMAL(1, 0)), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 9), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_bill_addr_sk=[$2], ws_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query63.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query63.q.out index 35351157fb..ededec4663 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query63.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query63.q.out @@ -75,15 +75,16 @@ HiveSortLimit(sort0=[$0], sort1=[$2], sort2=[$1], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(s_store_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_manager_id=[$20]) - HiveFilter(condition=[AND(IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'refernece', _UTF-16LE'self-help', _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9', _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'), IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics', _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'refernece', _UTF-16LE'self-help'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9')), AND(IN($12, _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), IN($10, _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'))), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0], d_moy=[$8]) - HiveFilter(condition=[AND(IN($3, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_store_sk=[$2], ss_sales_price=[$3], i_item_sk=[$4], i_manager_id=[$5], d_date_sk=[$6], d_moy=[$7]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_manager_id=[$20]) + HiveFilter(condition=[AND(IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'refernece', _UTF-16LE'self-help', _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9', _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'), IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics', _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), OR(AND(IN($12, _UTF-16LE'Books', _UTF-16LE'Children', _UTF-16LE'Electronics'), IN($10, _UTF-16LE'personal', _UTF-16LE'portable', _UTF-16LE'refernece', _UTF-16LE'self-help'), IN($8, _UTF-16LE'scholaramalgamalg #14', _UTF-16LE'scholaramalgamalg #7', _UTF-16LE'exportiunivamalg #9', _UTF-16LE'scholaramalgamalg #9')), AND(IN($12, _UTF-16LE'Women', _UTF-16LE'Music', _UTF-16LE'Men'), IN($10, _UTF-16LE'accessories', _UTF-16LE'classical', _UTF-16LE'fragrances', _UTF-16LE'pants'), IN($8, _UTF-16LE'amalgimporto #1', _UTF-16LE'edu packscholar #1', _UTF-16LE'exportiimporto #1', _UTF-16LE'importoamalg #1'))), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0], d_moy=[$8]) + HiveFilter(condition=[AND(IN($3, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query64.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query64.q.out index 35b52752a1..d0dc52f6c7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query64.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query64.q.out @@ -276,78 +276,84 @@ HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number= HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) - HiveJoin(condition=[=($35, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $18)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($0), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$1], c_current_hdemo_sk=[$2], c_current_addr_sk=[$3], c_first_shipto_date_sk=[$4], c_first_sales_date_sk=[$5], d_date_sk=[$6], d_year=[$7], d_date_sk0=[$8], d_year0=[$9], hd_demo_sk=[$10], hd_income_band_sk=[$11], ib_income_band_sk=[$12], ca_address_sk=[$13], ca_street_number=[$14], ca_street_name=[$15], ca_city=[$16], ca_zip=[$17], cd_demo_sk=[$18], cd_marital_status=[$19], sr_item_sk=[$20], sr_ticket_number=[$21], ca_address_sk0=[$22], ca_street_number0=[$23], ca_street_name0=[$24], ca_city0=[$25], ca_zip0=[$26], s_store_sk=[$27], s_store_name=[$28], s_zip=[$29], hd_demo_sk0=[$30], hd_income_band_sk0=[$31], p_promo_sk=[$32], ss_sold_date_sk=[$33], ss_item_sk=[$34], ss_customer_sk=[$35], ss_cdemo_sk=[$36], ss_hdemo_sk=[$37], ss_addr_sk=[$38], ss_store_sk=[$39], ss_promo_sk=[$40], ss_ticket_number=[$41], ss_wholesale_cost=[$42], ss_list_price=[$43], ss_coupon_amt=[$44], i_item_sk=[$45], i_product_name=[$46], d_date_sk1=[$47], $f0=[$48]) + HiveJoin(condition=[=($35, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $18)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) HiveProject(d_date_sk=[$0], d_year=[$6]) HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1], ib_income_band_sk=[$2]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) - HiveProject(ib_income_band_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, income_band]], table:alias=[ib2]) - HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) - HiveProject(sr_item_sk=[$0], sr_ticket_number=[$1], ca_address_sk=[$2], ca_street_number=[$3], ca_street_name=[$4], ca_city=[$5], ca_zip=[$6], s_store_sk=[$7], s_store_name=[$8], s_zip=[$9], hd_demo_sk=[$10], hd_income_band_sk=[$11], p_promo_sk=[$12], ss_sold_date_sk=[$13], ss_item_sk=[$14], ss_customer_sk=[$15], ss_cdemo_sk=[$16], ss_hdemo_sk=[$17], ss_addr_sk=[$18], ss_store_sk=[$19], ss_promo_sk=[$20], ss_ticket_number=[$21], ss_wholesale_cost=[$22], ss_list_price=[$23], ss_coupon_amt=[$24], i_item_sk=[$25], i_product_name=[$26], d_date_sk=[$27], $f0=[$28]) - HiveJoin(condition=[AND(=($14, $0), =($21, $1))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($16, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1], ib_income_band_sk=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) + HiveProject(ib_income_band_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, income_band]], table:alias=[ib2]) HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) - HiveJoin(condition=[=($12, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) - HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveJoin(condition=[=($1, $15)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_wholesale_cost=[$11], ss_list_price=[$12], ss_coupon_amt=[$19]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7), IS NOT NULL($2), IS NOT NULL($9))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(2, 0), 45:DECIMAL(2, 0)), IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject($f0=[$0]) - HiveFilter(condition=[>($1, *(2:DECIMAL(10, 0), $2))]) - HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) - HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_item_sk=[$15], cs_order_number=[$17], cs_ext_list_price=[$25]) - HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($17))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(cr_item_sk=[$2], cr_order_number=[$16], +=[+(+($23, $24), $25)]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($16))]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveProject(sr_item_sk=[$0], sr_ticket_number=[$1], ca_address_sk=[$2], ca_street_number=[$3], ca_street_name=[$4], ca_city=[$5], ca_zip=[$6], s_store_sk=[$7], s_store_name=[$8], s_zip=[$9], hd_demo_sk=[$10], hd_income_band_sk=[$11], p_promo_sk=[$12], ss_sold_date_sk=[$13], ss_item_sk=[$14], ss_customer_sk=[$15], ss_cdemo_sk=[$16], ss_hdemo_sk=[$17], ss_addr_sk=[$18], ss_store_sk=[$19], ss_promo_sk=[$20], ss_ticket_number=[$21], ss_wholesale_cost=[$22], ss_list_price=[$23], ss_coupon_amt=[$24], i_item_sk=[$25], i_product_name=[$26], d_date_sk=[$27], $f0=[$28]) + HiveJoin(condition=[AND(=($14, $0), =($21, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$1], ca_street_name=[$2], ca_city=[$3], ca_zip=[$4], s_store_sk=[$5], s_store_name=[$6], s_zip=[$7], hd_demo_sk=[$8], hd_income_band_sk=[$9], p_promo_sk=[$10], ss_sold_date_sk=[$11], ss_item_sk=[$12], ss_customer_sk=[$13], ss_cdemo_sk=[$14], ss_hdemo_sk=[$15], ss_addr_sk=[$16], ss_store_sk=[$17], ss_promo_sk=[$18], ss_ticket_number=[$19], ss_wholesale_cost=[$20], ss_list_price=[$21], ss_coupon_amt=[$22], i_item_sk=[$23], i_product_name=[$24], d_date_sk=[$25], $f0=[$26]) + HiveJoin(condition=[=($16, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) + HiveProject(s_store_sk=[$0], s_store_name=[$1], s_zip=[$2], hd_demo_sk=[$3], hd_income_band_sk=[$4], p_promo_sk=[$5], ss_sold_date_sk=[$6], ss_item_sk=[$7], ss_customer_sk=[$8], ss_cdemo_sk=[$9], ss_hdemo_sk=[$10], ss_addr_sk=[$11], ss_store_sk=[$12], ss_promo_sk=[$13], ss_ticket_number=[$14], ss_wholesale_cost=[$15], ss_list_price=[$16], ss_coupon_amt=[$17], i_item_sk=[$18], i_product_name=[$19], d_date_sk=[$20], $f0=[$21]) + HiveJoin(condition=[=($12, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1], p_promo_sk=[$2], ss_sold_date_sk=[$3], ss_item_sk=[$4], ss_customer_sk=[$5], ss_cdemo_sk=[$6], ss_hdemo_sk=[$7], ss_addr_sk=[$8], ss_store_sk=[$9], ss_promo_sk=[$10], ss_ticket_number=[$11], ss_wholesale_cost=[$12], ss_list_price=[$13], ss_coupon_amt=[$14], i_item_sk=[$15], i_product_name=[$16], d_date_sk=[$17], $f0=[$18]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) + HiveProject(p_promo_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_wholesale_cost=[$10], ss_list_price=[$11], ss_coupon_amt=[$12], i_item_sk=[$13], i_product_name=[$14], d_date_sk=[$15], $f0=[$16]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_cdemo_sk=[$3], ss_hdemo_sk=[$4], ss_addr_sk=[$5], ss_store_sk=[$6], ss_promo_sk=[$7], ss_ticket_number=[$8], ss_wholesale_cost=[$9], ss_list_price=[$10], ss_coupon_amt=[$11], i_item_sk=[$12], i_product_name=[$13], d_date_sk=[$14], $f0=[$15]) + HiveJoin(condition=[=($1, $15)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_wholesale_cost=[$11], ss_list_price=[$12], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7), IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_product_name=[$21]) + HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(2, 0), 45:DECIMAL(2, 0)), IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject($f0=[$0]) + HiveFilter(condition=[>($1, *(2:DECIMAL(10, 0), $2))]) + HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) + HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$15], cs_order_number=[$17], cs_ext_list_price=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($17))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], +=[+(+($23, $24), $25)]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($16))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) HiveProject(ib_income_band_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, income_band]], table:alias=[ib1]) @@ -360,78 +366,84 @@ HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number= HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) - HiveJoin(condition=[=($35, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $18)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($0), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$1], c_current_hdemo_sk=[$2], c_current_addr_sk=[$3], c_first_shipto_date_sk=[$4], c_first_sales_date_sk=[$5], d_date_sk=[$6], d_year=[$7], d_date_sk0=[$8], d_year0=[$9], hd_demo_sk=[$10], hd_income_band_sk=[$11], ib_income_band_sk=[$12], ca_address_sk=[$13], ca_street_number=[$14], ca_street_name=[$15], ca_city=[$16], ca_zip=[$17], cd_demo_sk=[$18], cd_marital_status=[$19], sr_item_sk=[$20], sr_ticket_number=[$21], ca_address_sk0=[$22], ca_street_number0=[$23], ca_street_name0=[$24], ca_city0=[$25], ca_zip0=[$26], s_store_sk=[$27], s_store_name=[$28], s_zip=[$29], hd_demo_sk0=[$30], hd_income_band_sk0=[$31], p_promo_sk=[$32], ss_sold_date_sk=[$33], ss_item_sk=[$34], ss_customer_sk=[$35], ss_cdemo_sk=[$36], ss_hdemo_sk=[$37], ss_addr_sk=[$38], ss_store_sk=[$39], ss_promo_sk=[$40], ss_ticket_number=[$41], ss_wholesale_cost=[$42], ss_list_price=[$43], ss_coupon_amt=[$44], i_item_sk=[$45], i_product_name=[$46], d_date_sk1=[$47], $f0=[$48]) + HiveJoin(condition=[=($35, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $18)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) HiveProject(d_date_sk=[$0], d_year=[$6]) HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1], ib_income_band_sk=[$2]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) - HiveProject(ib_income_band_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, income_band]], table:alias=[ib2]) - HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) - HiveProject(sr_item_sk=[$0], sr_ticket_number=[$1], ca_address_sk=[$2], ca_street_number=[$3], ca_street_name=[$4], ca_city=[$5], ca_zip=[$6], s_store_sk=[$7], s_store_name=[$8], s_zip=[$9], hd_demo_sk=[$10], hd_income_band_sk=[$11], p_promo_sk=[$12], ss_sold_date_sk=[$13], ss_item_sk=[$14], ss_customer_sk=[$15], ss_cdemo_sk=[$16], ss_hdemo_sk=[$17], ss_addr_sk=[$18], ss_store_sk=[$19], ss_promo_sk=[$20], ss_ticket_number=[$21], ss_wholesale_cost=[$22], ss_list_price=[$23], ss_coupon_amt=[$24], i_item_sk=[$25], i_product_name=[$26], d_date_sk=[$27], $f0=[$28]) - HiveJoin(condition=[AND(=($14, $0), =($21, $1))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($16, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1], ib_income_band_sk=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) + HiveProject(ib_income_band_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, income_band]], table:alias=[ib2]) HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) - HiveJoin(condition=[=($12, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) - HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveJoin(condition=[=($1, $15)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_wholesale_cost=[$11], ss_list_price=[$12], ss_coupon_amt=[$19]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7), IS NOT NULL($2), IS NOT NULL($9))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(2, 0), 45:DECIMAL(2, 0)), IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject($f0=[$0]) - HiveFilter(condition=[>($1, *(2:DECIMAL(10, 0), $2))]) - HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) - HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_item_sk=[$15], cs_order_number=[$17], cs_ext_list_price=[$25]) - HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($17))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(cr_item_sk=[$2], cr_order_number=[$16], +=[+(+($23, $24), $25)]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($16))]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveProject(sr_item_sk=[$0], sr_ticket_number=[$1], ca_address_sk=[$2], ca_street_number=[$3], ca_street_name=[$4], ca_city=[$5], ca_zip=[$6], s_store_sk=[$7], s_store_name=[$8], s_zip=[$9], hd_demo_sk=[$10], hd_income_band_sk=[$11], p_promo_sk=[$12], ss_sold_date_sk=[$13], ss_item_sk=[$14], ss_customer_sk=[$15], ss_cdemo_sk=[$16], ss_hdemo_sk=[$17], ss_addr_sk=[$18], ss_store_sk=[$19], ss_promo_sk=[$20], ss_ticket_number=[$21], ss_wholesale_cost=[$22], ss_list_price=[$23], ss_coupon_amt=[$24], i_item_sk=[$25], i_product_name=[$26], d_date_sk=[$27], $f0=[$28]) + HiveJoin(condition=[AND(=($14, $0), =($21, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$1], ca_street_name=[$2], ca_city=[$3], ca_zip=[$4], s_store_sk=[$5], s_store_name=[$6], s_zip=[$7], hd_demo_sk=[$8], hd_income_band_sk=[$9], p_promo_sk=[$10], ss_sold_date_sk=[$11], ss_item_sk=[$12], ss_customer_sk=[$13], ss_cdemo_sk=[$14], ss_hdemo_sk=[$15], ss_addr_sk=[$16], ss_store_sk=[$17], ss_promo_sk=[$18], ss_ticket_number=[$19], ss_wholesale_cost=[$20], ss_list_price=[$21], ss_coupon_amt=[$22], i_item_sk=[$23], i_product_name=[$24], d_date_sk=[$25], $f0=[$26]) + HiveJoin(condition=[=($16, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) + HiveProject(s_store_sk=[$0], s_store_name=[$1], s_zip=[$2], hd_demo_sk=[$3], hd_income_band_sk=[$4], p_promo_sk=[$5], ss_sold_date_sk=[$6], ss_item_sk=[$7], ss_customer_sk=[$8], ss_cdemo_sk=[$9], ss_hdemo_sk=[$10], ss_addr_sk=[$11], ss_store_sk=[$12], ss_promo_sk=[$13], ss_ticket_number=[$14], ss_wholesale_cost=[$15], ss_list_price=[$16], ss_coupon_amt=[$17], i_item_sk=[$18], i_product_name=[$19], d_date_sk=[$20], $f0=[$21]) + HiveJoin(condition=[=($12, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1], p_promo_sk=[$2], ss_sold_date_sk=[$3], ss_item_sk=[$4], ss_customer_sk=[$5], ss_cdemo_sk=[$6], ss_hdemo_sk=[$7], ss_addr_sk=[$8], ss_store_sk=[$9], ss_promo_sk=[$10], ss_ticket_number=[$11], ss_wholesale_cost=[$12], ss_list_price=[$13], ss_coupon_amt=[$14], i_item_sk=[$15], i_product_name=[$16], d_date_sk=[$17], $f0=[$18]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) + HiveProject(p_promo_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_wholesale_cost=[$10], ss_list_price=[$11], ss_coupon_amt=[$12], i_item_sk=[$13], i_product_name=[$14], d_date_sk=[$15], $f0=[$16]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_cdemo_sk=[$3], ss_hdemo_sk=[$4], ss_addr_sk=[$5], ss_store_sk=[$6], ss_promo_sk=[$7], ss_ticket_number=[$8], ss_wholesale_cost=[$9], ss_list_price=[$10], ss_coupon_amt=[$11], i_item_sk=[$12], i_product_name=[$13], d_date_sk=[$14], $f0=[$15]) + HiveJoin(condition=[=($1, $15)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_wholesale_cost=[$11], ss_list_price=[$12], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7), IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_product_name=[$21]) + HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(2, 0), 45:DECIMAL(2, 0)), IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject($f0=[$0]) + HiveFilter(condition=[>($1, *(2:DECIMAL(10, 0), $2))]) + HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) + HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$15], cs_order_number=[$17], cs_ext_list_price=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($17))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], +=[+(+($23, $24), $25)]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($16))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) HiveProject(ib_income_band_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, income_band]], table:alias=[ib1]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query65.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query65.q.out index ef48ec3ccf..1f93853837 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query65.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query65.q.out @@ -71,32 +71,34 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(i_item_sk=[$0], i_item_desc=[$4], i_current_price=[$5], i_wholesale_cost=[$6], i_brand=[$8]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0], s_store_name=[$5]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[AND(=($3, $0), <=($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveProject(ss_store_sk=[$1], ss_item_sk=[$0], $f2=[$2]) - HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], *=[*(0.1:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(21, 6))]) - HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]) - HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) - HiveProject(ss_item_sk=[$0], ss_store_sk=[$1], $f2=[$2]) - HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$1], $f0=[$2], $f1=[$3], $f2=[$4], $f00=[$5], *=[$6]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_name=[$5]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f00=[$3], *=[$4]) + HiveJoin(condition=[AND(=($3, $0), <=($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveProject(ss_store_sk=[$1], ss_item_sk=[$0], $f2=[$2]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], *=[*(0.1:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(21, 6))]) + HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveProject(ss_item_sk=[$0], ss_store_sk=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query66.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query66.q.out index 3255a9fb95..29464480ec 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query66.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query66.q.out @@ -468,21 +468,22 @@ HiveProject(w_warehouse_name=[$0], w_warehouse_sq_ft=[$1], w_city=[$2], w_county HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2], w_warehouse_sq_ft=[$3], w_city=[$8], w_county=[$9], w_state=[$10], w_country=[$12]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) - HiveJoin(condition=[=($2, $20)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_sold_time_sk=[$1], ws_ship_mode_sk=[$14], ws_warehouse_sk=[$15], *=[*($21, CAST($18):DECIMAL(10, 0))], *5=[*($30, CAST($18):DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($15), IS NOT NULL($0), IS NOT NULL($14))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(t_time_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, $2, 49530, 78330), IS NOT NULL($0))]) - HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) - HiveProject(d_date_sk=[$0], ==[=($8, 1)], =2=[=($8, 2)], =3=[=($8, 3)], =4=[=($8, 4)], =5=[=($8, 5)], =6=[=($8, 6)], =7=[=($8, 7)], =8=[=($8, 8)], =9=[=($8, 9)], =10=[=($8, 10)], =11=[=($8, 11)], =12=[=($8, 12)]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(sm_ship_mode_sk=[$0]) - HiveFilter(condition=[AND(IN($4, _UTF-16LE'DIAMOND', _UTF-16LE'AIRBORNE'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) + HiveProject(ws_sold_date_sk=[$0], ws_sold_time_sk=[$1], ws_ship_mode_sk=[$2], ws_warehouse_sk=[$3], *=[$4], *5=[$5], t_time_sk=[$6], d_date_sk=[$7], ==[$8], =2=[$9], =3=[$10], =4=[$11], =5=[$12], =6=[$13], =7=[$14], =8=[$15], =9=[$16], =10=[$17], =11=[$18], =12=[$19], sm_ship_mode_sk=[$20]) + HiveJoin(condition=[=($2, $20)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_sold_time_sk=[$1], ws_ship_mode_sk=[$14], ws_warehouse_sk=[$15], *=[*($21, CAST($18):DECIMAL(10, 0))], *5=[*($30, CAST($18):DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($15), IS NOT NULL($0), IS NOT NULL($14))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, $2, 49530, 78330), IS NOT NULL($0))]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(d_date_sk=[$0], ==[=($8, 1)], =2=[=($8, 2)], =3=[=($8, 3)], =4=[=($8, 4)], =5=[=($8, 5)], =6=[=($8, 6)], =7=[=($8, 7)], =8=[=($8, 8)], =9=[=($8, 9)], =10=[=($8, 10)], =11=[=($8, 11)], =12=[=($8, 12)]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(sm_ship_mode_sk=[$0]) + HiveFilter(condition=[AND(IN($4, _UTF-16LE'DIAMOND', _UTF-16LE'AIRBORNE'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8], $f9=[$9], $f10=[$10], $f11=[$11], $f12=[$12], $f13=[$13], $f14=[$14], $f15=[$15], $f16=[$16], $f17=[$17], $f18=[$18], $f19=[$19], $f20=[$20], $f21=[$21], $f22=[$22], $f23=[$23], $f24=[$24], $f25=[$25], $f26=[$26], $f27=[$27], $f28=[$28], $f29=[$29]) HiveAggregate(group=[{0, 1, 2, 3, 4, 5}], agg#0=[sum($6)], agg#1=[sum($7)], agg#2=[sum($8)], agg#3=[sum($9)], agg#4=[sum($10)], agg#5=[sum($11)], agg#6=[sum($12)], agg#7=[sum($13)], agg#8=[sum($14)], agg#9=[sum($15)], agg#10=[sum($16)], agg#11=[sum($17)], agg#12=[sum($18)], agg#13=[sum($19)], agg#14=[sum($20)], agg#15=[sum($21)], agg#16=[sum($22)], agg#17=[sum($23)], agg#18=[sum($24)], agg#19=[sum($25)], agg#20=[sum($26)], agg#21=[sum($27)], agg#22=[sum($28)], agg#23=[sum($29)]) HiveProject($f0=[$22], $f1=[$23], $f2=[$24], $f3=[$25], $f4=[$26], $f5=[$27], $f7=[CASE($8, $4, 0:DECIMAL(18, 2))], $f8=[CASE($9, $4, 0:DECIMAL(18, 2))], $f9=[CASE($10, $4, 0:DECIMAL(18, 2))], $f10=[CASE($11, $4, 0:DECIMAL(18, 2))], $f11=[CASE($12, $4, 0:DECIMAL(18, 2))], $f12=[CASE($13, $4, 0:DECIMAL(18, 2))], $f13=[CASE($14, $4, 0:DECIMAL(18, 2))], $f14=[CASE($15, $4, 0:DECIMAL(18, 2))], $f15=[CASE($16, $4, 0:DECIMAL(18, 2))], $f16=[CASE($17, $4, 0:DECIMAL(18, 2))], $f17=[CASE($18, $4, 0:DECIMAL(18, 2))], $f18=[CASE($19, $4, 0:DECIMAL(18, 2))], $f19=[CASE($8, $5, 0:DECIMAL(18, 2))], $f20=[CASE($9, $5, 0:DECIMAL(18, 2))], $f21=[CASE($10, $5, 0:DECIMAL(18, 2))], $f22=[CASE($11, $5, 0:DECIMAL(18, 2))], $f23=[CASE($12, $5, 0:DECIMAL(18, 2))], $f24=[CASE($13, $5, 0:DECIMAL(18, 2))], $f25=[CASE($14, $5, 0:DECIMAL(18, 2))], $f26=[CASE($15, $5, 0:DECIMAL(18, 2))], $f27=[CASE($16, $5, 0:DECIMAL(18, 2))], $f28=[CASE($17, $5, 0:DECIMAL(18, 2))], $f29=[CASE($18, $5, 0:DECIMAL(18, 2))], $f30=[CASE($19, $5, 0:DECIMAL(18, 2))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query68.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query68.q.out index 6032192938..f1b591a9e9 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query68.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query68.q.out @@ -111,19 +111,20 @@ HiveSortLimit(sort0=[$0], sort1=[$4], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0], ca_city=[$6]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_ext_sales_price=[$15], ss_ext_list_price=[$17], ss_ext_tax=[$18]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, $9, 1, 2), IN($6, 1998, 1999, 2000), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[AND(IN($22, _UTF-16LE'Cedar Grove', _UTF-16LE'Wildwood'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[AND(OR(=($3, 2), =($4, 1)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], ss_hdemo_sk=[$2], ss_addr_sk=[$3], ss_store_sk=[$4], ss_ticket_number=[$5], ss_ext_sales_price=[$6], ss_ext_list_price=[$7], ss_ext_tax=[$8], d_date_sk=[$9], s_store_sk=[$10], hd_demo_sk=[$11]) + HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_ext_sales_price=[$15], ss_ext_list_price=[$17], ss_ext_tax=[$18]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, $9, 1, 2), IN($6, 1998, 1999, 2000), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[AND(IN($22, _UTF-16LE'Cedar Grove', _UTF-16LE'Wildwood'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(OR(=($3, 2), =($4, 1)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query7.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query7.q.out index ff14d6dd25..41b4c3c85a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query7.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query7.q.out @@ -58,19 +58,20 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($3, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_cdemo_sk=[$4], ss_promo_sk=[$8], ss_quantity=[$10], ss_list_price=[$12], ss_sales_price=[$13], ss_coupon_amt=[$19]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(cd_demo_sk=[$0]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'W'), =($3, _UTF-16LE'Primary'), =($1, _UTF-16LE'F'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[AND(OR(=($9, _UTF-16LE'N'), =($14, _UTF-16LE'N')), IS NOT NULL($0))]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_cdemo_sk=[$2], ss_promo_sk=[$3], ss_quantity=[$4], ss_list_price=[$5], ss_sales_price=[$6], ss_coupon_amt=[$7], cd_demo_sk=[$8], d_date_sk=[$9], p_promo_sk=[$10]) + HiveJoin(condition=[=($3, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_cdemo_sk=[$4], ss_promo_sk=[$8], ss_quantity=[$10], ss_list_price=[$12], ss_sales_price=[$13], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'W'), =($3, _UTF-16LE'Primary'), =($1, _UTF-16LE'F'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[AND(OR(=($9, _UTF-16LE'N'), =($14, _UTF-16LE'N')), IS NOT NULL($0))]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query71.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query71.q.out index e1c8bc4a6a..f8eeec31f1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query71.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query71.q.out @@ -97,34 +97,35 @@ HiveProject(brand_id=[$0], brand=[$1], t_hour=[$2], t_minute=[$3], ext_price=[$4 HiveProject(t_time_sk=[$0], t_hour=[$3], t_minute=[$4]) HiveFilter(condition=[AND(IN($9, _UTF-16LE'breakfast', _UTF-16LE'dinner'), IS NOT NULL($0))]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ext_price=[$0], sold_item_sk=[$1], time_sk=[$2]) - HiveUnion(all=[true]) - HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) - HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_sold_time_sk=[$1], ws_item_sk=[$3], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), =($8, 12), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) - HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_sold_time_sk=[$1], cs_item_sk=[$15], cs_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), =($8, 12), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) - HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_sold_time_sk=[$1], ss_item_sk=[$2], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), =($8, 12), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_brand=[$8]) - HiveFilter(condition=[AND(=($20, 1), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ext_price=[$0], sold_item_sk=[$1], time_sk=[$2], i_item_sk=[$3], i_brand_id=[$4], i_brand=[$5]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ext_price=[$0], sold_item_sk=[$1], time_sk=[$2]) + HiveUnion(all=[true]) + HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_sold_time_sk=[$1], ws_item_sk=[$3], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), =($8, 12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_sold_time_sk=[$1], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), =($8, 12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_sold_time_sk=[$1], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), =($8, 12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_brand=[$8]) + HiveFilter(condition=[AND(=($20, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out index c38ed02ebd..cb07981cad 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out @@ -99,29 +99,31 @@ HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], sort3=[$2], dir0=[DESC-nulls-l HiveProject(d_date_sk=[$0], CAST=[CAST($2):DOUBLE]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL(CAST($2):DOUBLE))]) HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_item_desc=[$4]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($5, $13)], joinType=[left], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_ship_date_sk=[$2], cs_bill_cdemo_sk=[$4], cs_bill_hdemo_sk=[$5], cs_item_sk=[$15], cs_promo_sk=[$16], cs_order_number=[$17], cs_quantity=[$18]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($18), IS NOT NULL($5), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_week_seq=[$4], +=[+(CAST($2):DOUBLE, 5)]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0), IS NOT NULL($4), IS NOT NULL(CAST($2):DOUBLE))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(cd_demo_sk=[$0]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'M'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) - HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'1001-5000'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveProject(p_promo_sk=[$0]) + HiveProject(i_item_sk=[$0], i_item_desc=[$1], cs_sold_date_sk=[$2], cs_ship_date_sk=[$3], cs_bill_cdemo_sk=[$4], cs_bill_hdemo_sk=[$5], cs_item_sk=[$6], cs_promo_sk=[$7], cs_order_number=[$8], cs_quantity=[$9], d_date_sk=[$10], d_week_seq=[$11], +=[$12], cd_demo_sk=[$13], hd_demo_sk=[$14], p_promo_sk=[$15]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_desc=[$4]) HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_date_sk=[$1], cs_bill_cdemo_sk=[$2], cs_bill_hdemo_sk=[$3], cs_item_sk=[$4], cs_promo_sk=[$5], cs_order_number=[$6], cs_quantity=[$7], d_date_sk=[$8], d_week_seq=[$9], +=[$10], cd_demo_sk=[$11], hd_demo_sk=[$12], p_promo_sk=[$13]) + HiveJoin(condition=[=($5, $13)], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_date_sk=[$2], cs_bill_cdemo_sk=[$4], cs_bill_hdemo_sk=[$5], cs_item_sk=[$15], cs_promo_sk=[$16], cs_order_number=[$17], cs_quantity=[$18]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($18), IS NOT NULL($5), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4], +=[+(CAST($2):DOUBLE, 5)]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0), IS NOT NULL($4), IS NOT NULL(CAST($2):DOUBLE))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'M'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'1001-5000'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(d_date_sk=[$0], d_week_seq=[$4]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out index 6b180bd064..68f832aadf 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query74.q.out @@ -138,54 +138,59 @@ HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IN($6, 2001, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f4=[$3]) - HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IN($6, 2001, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f4=[$3]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], ss_net_paid=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), IN($6, 2001, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f4=[$1], $f00=[$2], $f40=[$3], customer_id=[$4], year_total=[$5], CAST=[$6]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f4=[$3]) + HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], ws_net_paid=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), IN($6, 2001, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f4=[$3]) + HiveFilter(condition=[>($3, 0:DECIMAL(1, 0))]) + HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], ss_net_paid=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IN($6, 2001, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$3], CAST=[CAST(IS NOT NULL($3)):BOOLEAN]) HiveFilter(condition=[>($3, 0:DECIMAL(1, 0))]) HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IN($6, 2001, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(customer_id=[$0], year_total=[$3], CAST=[CAST(IS NOT NULL($3)):BOOLEAN]) - HiveFilter(condition=[>($3, 0:DECIMAL(1, 0))]) - HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IN($6, 2001, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], ws_net_paid=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IN($6, 2001, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query75.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query75.q.out index 73b94521e5..d6b814a1c1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query75.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query75.q.out @@ -174,49 +174,52 @@ HiveProject(prev_year=[CAST(2001):INTEGER], year=[CAST(2002):INTEGER], i_brand_i HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_quantity=[$17], cr_return_amount=[$18]) HiveFilter(condition=[AND(IS NOT NULL($16), IS NOT NULL($2))]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$1], cs_order_number=[$2], cs_quantity=[$3], cs_ext_sales_price=[$4], d_date_sk=[$5], i_item_sk=[$6], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_brand_id=[$11], i_class_id=[$12], i_category_id=[$13], i_manufact_id=[$14], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0:DECIMAL(1, 0)))]) HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_quantity=[$10], sr_return_amt=[$11]) HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_ticket_number=[$2], ss_quantity=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5], i_item_sk=[$6], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_brand_id=[$11], i_class_id=[$12], i_category_id=[$13], i_manufact_id=[$14], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0:DECIMAL(1, 0)))]) HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15]) HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($2))]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_order_number=[$2], ws_quantity=[$3], ws_ext_sales_price=[$4], d_date_sk=[$5], i_item_sk=[$6], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], $f4=[$4], $f5=[$5]) HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[sum($4)], agg#1=[sum($5)]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) @@ -232,47 +235,50 @@ HiveProject(prev_year=[CAST(2001):INTEGER], year=[CAST(2002):INTEGER], i_brand_i HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_quantity=[$17], cr_return_amount=[$18]) HiveFilter(condition=[AND(IS NOT NULL($16), IS NOT NULL($2))]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$1], cs_order_number=[$2], cs_quantity=[$3], cs_ext_sales_price=[$4], d_date_sk=[$5], i_item_sk=[$6], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_brand_id=[$11], i_class_id=[$12], i_category_id=[$13], i_manufact_id=[$14], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0:DECIMAL(1, 0)))]) HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_quantity=[$10], sr_return_amt=[$11]) HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($2))]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_ticket_number=[$2], ss_quantity=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5], i_item_sk=[$6], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_brand_id=[$11], i_class_id=[$12], i_category_id=[$13], i_manufact_id=[$14], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0:DECIMAL(1, 0)))]) HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15]) HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($2))]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_order_number=[$2], ws_quantity=[$3], ws_ext_sales_price=[$4], d_date_sk=[$5], i_item_sk=[$6], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query76.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query76.q.out index 9eb6c1d386..8889c35249 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query76.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query76.q.out @@ -67,13 +67,14 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], dir0=[ HiveProject(d_date_sk=[$0], d_year=[$6], d_qoy=[$10]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_category=[$12]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NULL($6), IS NOT NULL($0), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_category=[$1], ss_sold_date_sk=[$2], ss_item_sk=[$3], ss_ext_sales_price=[$4]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_category=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NULL($6), IS NOT NULL($0), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(channel=[_UTF-16LE'web':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], col_name=[_UTF-16LE'ws_web_page_sk':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], d_year=[$6], d_qoy=[$7], i_category=[$4], ext_sales_price=[$2]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out index f3d481107e..5790516839 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[322][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[325][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product PREHOOK: query: explain cbo with ss as (select s_store_sk, @@ -258,13 +258,14 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(s_store_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_returned_date_sk=[$0], sr_store_sk=[$7], sr_return_amt=[$11], sr_net_loss=[$19]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(sr_returned_date_sk=[$0], sr_store_sk=[$1], sr_return_amt=[$2], sr_net_loss=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_store_sk=[$7], sr_return_amt=[$11], sr_net_loss=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(channel=[_UTF-16LE'catalog channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[$0], sales=[$1], returns=[$3], profit=[-($2, $4)]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_call_center_sk=[$0], $f1=[$1], $f2=[$2]) @@ -293,24 +294,26 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(wp_web_page_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_web_page_sk=[$12], ws_ext_sales_price=[$23], ws_net_profit=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_web_page_sk=[$1], ws_ext_sales_price=[$2], ws_net_profit=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_web_page_sk=[$12], ws_ext_sales_price=[$23], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(wp_web_page_sk=[$0], $f1=[$1], $f2=[$2]) HiveAggregate(group=[{0}], agg#0=[sum($3)], agg#1=[sum($4)]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(wp_web_page_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(wr_returned_date_sk=[$0], wr_web_page_sk=[$11], wr_return_amt=[$15], wr_net_loss=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(wr_returned_date_sk=[$0], wr_web_page_sk=[$1], wr_return_amt=[$2], wr_net_loss=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returned_date_sk=[$0], wr_web_page_sk=[$11], wr_return_amt=[$15], wr_net_loss=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query80.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query80.q.out index 634fc836b8..fada1767d3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query80.q.out @@ -227,25 +227,27 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(s_store_sk=[$0], s_store_id=[$1]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($4, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[AND(>($5, 50:DECIMAL(2, 0)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($1, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_ext_sales_price=[$15], ss_net_profit=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($7), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_amt=[$11], sr_net_loss=[$19]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[AND(=($11, _UTF-16LE'N'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(i_item_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_store_sk=[$3], ss_promo_sk=[$4], ss_ticket_number=[$5], ss_ext_sales_price=[$6], ss_net_profit=[$7], sr_item_sk=[$8], sr_ticket_number=[$9], sr_return_amt=[$10], sr_net_loss=[$11], d_date_sk=[$12], p_promo_sk=[$13]) + HiveJoin(condition=[=($4, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[AND(>($5, 50:DECIMAL(2, 0)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_store_sk=[$2], ss_promo_sk=[$3], ss_ticket_number=[$4], ss_ext_sales_price=[$5], ss_net_profit=[$6], sr_item_sk=[$7], sr_ticket_number=[$8], sr_return_amt=[$9], sr_net_loss=[$10], d_date_sk=[$11]) + HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_ext_sales_price=[$15], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($7), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_amt=[$11], sr_net_loss=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[AND(=($11, _UTF-16LE'N'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(channel=[_UTF-16LE'catalog channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'catalog_page', $0)], sales=[$1], returns=[$2], profit=[$3]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) HiveProject($f0=[$1], $f1=[$8], $f2=[CASE(IS NOT NULL($12), $12, 0:DECIMAL(12, 2))], $f3=[-($9, CASE(IS NOT NULL($13), $13, 0:DECIMAL(12, 2)))]) @@ -253,25 +255,27 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) - HiveJoin(condition=[=($4, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[AND(>($5, 50:DECIMAL(2, 0)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($2, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_catalog_page_sk=[$12], cs_item_sk=[$15], cs_promo_sk=[$16], cs_order_number=[$17], cs_ext_sales_price=[$23], cs_net_profit=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($16), IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($15))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_amount=[$18], cr_net_loss=[$26]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($16))]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[AND(=($11, _UTF-16LE'N'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(i_item_sk=[$0], cs_sold_date_sk=[$1], cs_catalog_page_sk=[$2], cs_item_sk=[$3], cs_promo_sk=[$4], cs_order_number=[$5], cs_ext_sales_price=[$6], cs_net_profit=[$7], cr_item_sk=[$8], cr_order_number=[$9], cr_return_amount=[$10], cr_net_loss=[$11], d_date_sk=[$12], p_promo_sk=[$13]) + HiveJoin(condition=[=($4, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[AND(>($5, 50:DECIMAL(2, 0)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_sold_date_sk=[$0], cs_catalog_page_sk=[$1], cs_item_sk=[$2], cs_promo_sk=[$3], cs_order_number=[$4], cs_ext_sales_price=[$5], cs_net_profit=[$6], cr_item_sk=[$7], cr_order_number=[$8], cr_return_amount=[$9], cr_net_loss=[$10], d_date_sk=[$11]) + HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($2, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_catalog_page_sk=[$12], cs_item_sk=[$15], cs_promo_sk=[$16], cs_order_number=[$17], cs_ext_sales_price=[$23], cs_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($16), IS NOT NULL($0), IS NOT NULL($12), IS NOT NULL($15))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_amount=[$18], cr_net_loss=[$26]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($16))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[AND(=($11, _UTF-16LE'N'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(channel=[_UTF-16LE'web channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'web_site', $0)], sales=[$1], returns=[$2], profit=[$3]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) HiveProject($f0=[$15], $f1=[$7], $f2=[CASE(IS NOT NULL($11), $11, 0:DECIMAL(12, 2))], $f3=[-($8, CASE(IS NOT NULL($12), $12, 0:DECIMAL(12, 2)))]) @@ -280,21 +284,23 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(p_promo_sk=[$0]) HiveFilter(condition=[AND(=($11, _UTF-16LE'N'), IS NOT NULL($0))]) HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[AND(>($5, 50:DECIMAL(2, 0)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($1, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_web_site_sk=[$13], ws_promo_sk=[$16], ws_order_number=[$17], ws_ext_sales_price=[$23], ws_net_profit=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($16), IS NOT NULL($13), IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_amt=[$15], wr_net_loss=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($13))]) - HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], ws_sold_date_sk=[$1], ws_item_sk=[$2], ws_web_site_sk=[$3], ws_promo_sk=[$4], ws_order_number=[$5], ws_ext_sales_price=[$6], ws_net_profit=[$7], wr_item_sk=[$8], wr_order_number=[$9], wr_return_amt=[$10], wr_net_loss=[$11], d_date_sk=[$12]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[AND(>($5, 50:DECIMAL(2, 0)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_web_site_sk=[$2], ws_promo_sk=[$3], ws_order_number=[$4], ws_ext_sales_price=[$5], ws_net_profit=[$6], wr_item_sk=[$7], wr_order_number=[$8], wr_return_amt=[$9], wr_net_loss=[$10], d_date_sk=[$11]) + HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_web_site_sk=[$13], ws_promo_sk=[$16], ws_order_number=[$17], ws_ext_sales_price=[$23], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($16), IS NOT NULL($13), IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_amt=[$15], wr_net_loss=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($13))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(web_site_sk=[$0], web_site_id=[$1]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query81.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query81.q.out index a4534d239e..de97fc401a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query81.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query81.q.out @@ -90,13 +90,14 @@ HiveProject(c_customer_id=[$0], c_salutation=[$1], c_first_name=[$2], c_last_nam HiveProject(ca_address_sk=[$0], ca_state=[$8]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($8))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_returning_addr_sk=[$10], cr_return_amt_inc_tax=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($7), IS NOT NULL($0))]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$1], cr_returning_addr_sk=[$2], cr_return_amt_inc_tax=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_returning_addr_sk=[$10], cr_return_amt_inc_tax=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2, 1))], ctr_state=[$0]) HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]) HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[count($2)]) @@ -106,11 +107,12 @@ HiveProject(c_customer_id=[$0], c_salutation=[$1], c_first_name=[$2], c_last_nam HiveProject(ca_address_sk=[$0], ca_state=[$8]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($8))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_returning_addr_sk=[$10], cr_return_amt_inc_tax=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($0))]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$1], cr_returning_addr_sk=[$2], cr_return_amt_inc_tax=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_returning_addr_sk=[$10], cr_return_amt_inc_tax=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out index c8f6ded703..63272b503f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out @@ -200,17 +200,18 @@ HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[$2], _o__c3=[$3]) HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3], ==[=($2, _UTF-16LE'M')], =4=[=($3, _UTF-16LE'4 yr Degree')], =5=[=($2, _UTF-16LE'D')], =6=[=($3, _UTF-16LE'Primary')], =7=[=($2, _UTF-16LE'U')], =8=[=($3, _UTF-16LE'Advanced Degree')]) HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) - HiveJoin(condition=[=($12, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($8, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(wr_item_sk=[$2], wr_refunded_cdemo_sk=[$4], wr_refunded_addr_sk=[$6], wr_returning_cdemo_sk=[$8], wr_reason_sk=[$12], wr_order_number=[$13], wr_fee=[$18], wr_refunded_cash=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($8), IS NOT NULL($4), IS NOT NULL($12), IS NOT NULL($2), IS NOT NULL($13))]) - HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveProject(wr_item_sk=[$0], wr_refunded_cdemo_sk=[$1], wr_refunded_addr_sk=[$2], wr_returning_cdemo_sk=[$3], wr_reason_sk=[$4], wr_order_number=[$5], wr_fee=[$6], wr_refunded_cash=[$7], ca_address_sk=[$8], IN=[$9], IN2=[$10], IN3=[$11], cd_demo_sk=[$12], cd_marital_status=[$13], cd_education_status=[$14]) + HiveJoin(condition=[=($12, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($8, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_item_sk=[$2], wr_refunded_cdemo_sk=[$4], wr_refunded_addr_sk=[$6], wr_returning_cdemo_sk=[$8], wr_reason_sk=[$12], wr_order_number=[$13], wr_fee=[$18], wr_refunded_cash=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($8), IS NOT NULL($4), IS NOT NULL($12), IS NOT NULL($2), IS NOT NULL($13))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) HiveProject(r_reason_sk=[$0], r_reason_desc=[$2]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, reason]], table:alias=[reason]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query87.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query87.q.out index 2684a3ea36..805311b55f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query87.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query87.q.out @@ -74,13 +74,14 @@ HiveAggregate(group=[{}], agg#0=[count()]) HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], d_date_sk=[$2], d_date=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$1], $f1=[$0], $f2=[$2], $f3=[1:BIGINT], $f4=[$3]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) @@ -89,13 +90,14 @@ HiveAggregate(group=[{}], agg#0=[count()]) HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], d_date_sk=[$2], d_date=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$1], $f1=[$0], $f2=[$2], $f3=[1:BIGINT], $f4=[$3]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) @@ -104,11 +106,12 @@ HiveAggregate(group=[{}], agg#0=[count()]) HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], d_date_sk=[$2], d_date=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query91.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query91.q.out index 3a2aacfff0..348f13c0c2 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query91.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query91.q.out @@ -85,13 +85,14 @@ HiveProject(call_center=[$0], call_center_name=[$1], manager=[$2], returns_loss= HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[AND(=($11, -7:DECIMAL(1, 0)), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($0), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'W'), IN($3, _UTF-16LE'Unknown', _UTF-16LE'Advanced Degree'), IN(ROW($2, $3), ROW(_UTF-16LE'M', _UTF-16LE'Unknown'), ROW(_UTF-16LE'W', _UTF-16LE'Advanced Degree')), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$1], c_current_hdemo_sk=[$2], c_current_addr_sk=[$3], cd_demo_sk=[$4], cd_marital_status=[$5], cd_education_status=[$6]) + HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'W'), IN($3, _UTF-16LE'Unknown', _UTF-16LE'Advanced Degree'), IN(ROW($2, $3), ROW(_UTF-16LE'M', _UTF-16LE'Unknown'), ROW(_UTF-16LE'W', _UTF-16LE'Advanced Degree')), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$1], cr_call_center_sk=[$2], cr_net_loss=[$3], d_date_sk=[$4], cc_call_center_sk=[$5], cc_call_center_id=[$6], cc_name=[$7], cc_manager=[$8]) HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out index 6a8ed39e29..114f5044a7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out @@ -49,11 +49,12 @@ HiveSortLimit(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_ticket_number=[$9], ss_quantity=[$10], ss_sales_price=[$13], *=[*(CAST($10):DECIMAL(10, 0), $13)]) HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_reason_sk=[$8], sr_ticket_number=[$9], sr_return_quantity=[$10], IS NOT NULL=[IS NOT NULL($10)]) - HiveFilter(condition=[AND(IS NOT NULL($8), IS NOT NULL($2), IS NOT NULL($9))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(r_reason_sk=[$0]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'Did not like the warranty'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, reason]], table:alias=[reason]) + HiveProject(sr_item_sk=[$0], sr_reason_sk=[$1], sr_ticket_number=[$2], sr_return_quantity=[$3], IS NOT NULL=[$4], r_reason_sk=[$5]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_reason_sk=[$8], sr_ticket_number=[$9], sr_return_quantity=[$10], IS NOT NULL=[IS NOT NULL($10)]) + HiveFilter(condition=[AND(IS NOT NULL($8), IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(r_reason_sk=[$0]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'Did not like the warranty'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, reason]], table:alias=[reason]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out index b8521f4189..bc55692748 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out @@ -77,13 +77,14 @@ HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[su HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"]) HiveFilter(condition=[AND(=($8, _UTF-16LE'TX'), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_warehouse_sk=[$15], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($2), IS NOT NULL($17))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_ship_date_sk=[$0], ws_ship_addr_sk=[$1], ws_web_site_sk=[$2], ws_warehouse_sk=[$3], ws_order_number=[$4], ws_ext_ship_cost=[$5], ws_net_profit=[$6], d_date_sk=[$7], d_date=[$8]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_warehouse_sk=[$15], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($2), IS NOT NULL($17))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"]) HiveFilter(condition=[AND(=($14, _UTF-16LE'pri'), IS NOT NULL($0))]) HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query95.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query95.q.out index 218ca7d8b6..06ed74f2b1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query95.q.out @@ -89,29 +89,32 @@ HiveAggregate(group=[{}], agg#0=[count(DISTINCT $6)], agg#1=[sum($7)], agg#2=[su HiveProject(wr_returned_date_sk=[$0], wr_returned_time_sk=[$1], wr_item_sk=[$2], wr_refunded_customer_sk=[$3], wr_refunded_cdemo_sk=[$4], wr_refunded_hdemo_sk=[$5], wr_refunded_addr_sk=[$6], wr_returning_customer_sk=[$7], wr_returning_cdemo_sk=[$8], wr_returning_hdemo_sk=[$9], wr_returning_addr_sk=[$10], wr_web_page_sk=[$11], wr_reason_sk=[$12], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15], wr_return_tax=[$16], wr_return_amt_inc_tax=[$17], wr_fee=[$18], wr_return_ship_cost=[$19], wr_refunded_cash=[$20], wr_reversed_charge=[$21], wr_account_credit=[$22], wr_net_loss=[$23], BLOCK__OFFSET__INSIDE__FILE=[$24], INPUT__FILE__NAME=[$25], ROW__ID=[$26]) HiveFilter(condition=[IS NOT NULL($13)]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_order_number=[$0]) - HiveAggregate(group=[{1}]) - HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) - HiveFilter(condition=[IS NOT NULL($17)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) - HiveFilter(condition=[IS NOT NULL($17)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) - HiveJoin(condition=[=($3, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[AND(=($8, _UTF-16LE'TX'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($2), IS NOT NULL($17))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(web_site_sk=[$0]) - HiveFilter(condition=[AND(=($14, _UTF-16LE'pri'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) + HiveProject(ws_order_number=[$0], ca_address_sk=[$1], ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_order_number0=[$5], ws_ext_ship_cost=[$6], ws_net_profit=[$7], d_date_sk=[$8], d_date=[$9], web_site_sk=[$10]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_order_number=[$0]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveFilter(condition=[IS NOT NULL($17)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveFilter(condition=[IS NOT NULL($17)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveProject(ca_address_sk=[$0], ws_ship_date_sk=[$1], ws_ship_addr_sk=[$2], ws_web_site_sk=[$3], ws_order_number=[$4], ws_ext_ship_cost=[$5], ws_net_profit=[$6], d_date_sk=[$7], d_date=[$8], web_site_sk=[$9]) + HiveJoin(condition=[=($3, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[AND(=($8, _UTF-16LE'TX'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(ws_ship_date_sk=[$0], ws_ship_addr_sk=[$1], ws_web_site_sk=[$2], ws_order_number=[$3], ws_ext_ship_cost=[$4], ws_net_profit=[$5], d_date_sk=[$6], d_date=[$7]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($2), IS NOT NULL($17))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0]) + HiveFilter(condition=[AND(=($14, _UTF-16LE'pri'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query98.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query98.q.out index 8881f9e4db..37f91e50d8 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query98.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query98.q.out @@ -77,11 +77,12 @@ HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3 HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) HiveFilter(condition=[AND(IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books'), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($2))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9)), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_ext_query1.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_ext_query1.q.out index cc80973d91..a752b59951 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_ext_query1.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_ext_query1.q.out @@ -62,33 +62,34 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cum HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveProject(c_customer_sk=[$0], c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveTableScan(table=[[default, customer]], table:alias=[customer]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveJoin(condition=[AND(=($2, $5), >($3, $4))], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[=($24, _UTF-16LE'NM')]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[IS NOT NULL($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[=($6, 2000)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2, 1))], ctr_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(s_store_sk=[$0], $f0=[$1], $f1=[$2], $f2=[$3], _o__c0=[$4], ctr_store_sk=[$5]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveJoin(condition=[AND(=($2, $5), >($3, $4))], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[=($24, _UTF-16LE'NM')]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[IS NOT NULL($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveFilter(condition=[=($6, 2000)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2, 1))], ctr_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[=($6, 2000)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### PREHOOK: query: explain cbo joincost with customer_total_return as @@ -154,31 +155,32 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]): rowcount = ###Masked###, cum HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[{8.000000566321465E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveProject(c_customer_sk=[$0], c_customer_id=[$1]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveTableScan(table=[[default, customer]], table:alias=[customer]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveJoin(condition=[AND(=($2, $5), >($3, $4))], joinType=[inner], algorithm=[none], cost=[{9294.436653471346 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[{455187.9173657213 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[=($24, _UTF-16LE'NM')]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[IS NOT NULL($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[{5.175767820386722E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[=($6, 2000)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2, 1))], ctr_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(s_store_sk=[$0], $f0=[$1], $f1=[$2], $f2=[$3], _o__c0=[$4], ctr_store_sk=[$5]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveJoin(condition=[AND(=($2, $5), >($3, $4))], joinType=[inner], algorithm=[none], cost=[{9294.436653471346 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[{455187.9173657213 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(s_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[=($24, _UTF-16LE'NM')]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveTableScan(table=[[default, store]], table:alias=[store]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[IS NOT NULL($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[{5.3635511784936875E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[{5.175767820386722E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveFilter(condition=[=($6, 2000)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2, 1))], ctr_store_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[{5.3635511784936875E7 rows, 0.0 cpu, 0.0 io}]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveProject(d_date_sk=[$0]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveFilter(condition=[=($6, 2000)]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]): rowcount = ###Masked###, cumulative cost = ###Masked###, id = ###Masked### diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query1.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query1.q.out index 210badb740..7f8063b9ed 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query1.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query1.q.out @@ -62,31 +62,32 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[AND(=($2, $5), >($3, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[=($24, _UTF-16LE'NM')]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2000)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2, 1))], ctr_store_sk=[$0]) - HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]) - HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) - HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]) + HiveProject(s_store_sk=[$0], $f0=[$1], $f1=[$2], $f2=[$3], _o__c0=[$4], ctr_store_sk=[$5]) + HiveJoin(condition=[AND(=($2, $5), >($3, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[=($24, _UTF-16LE'NM')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($7), IS NOT NULL($0))]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 2000)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2, 1))], ctr_store_sk=[$0]) + HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveProject(sr_customer_sk=[$0], sr_store_sk=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_customer_sk=[$3], sr_store_sk=[$7], sr_fee=[$14]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out index 53d470da5c..26435318fa 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out @@ -165,51 +165,57 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], -=[-($17, $14)]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f8=[$7]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], -=[$2], d_date_sk=[$3]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], -=[-($17, $14)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 2002)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject($f0=[$0], $f8=[$1], customer_id=[$2], year_total=[$3], CAST=[$4], $f00=[$5], $f9=[$6]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f8=[$7]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], -=[$2], d_date_sk=[$3]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f9=[$7]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], -=[-($17, $14)]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) + HiveFilter(condition=[=($6, 2002)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$1], CAST=[$2], $f0=[$3], $f9=[$4]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) + HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], -=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f9=[$7]) + HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], -=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], -=[-($17, $14)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query12.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query12.q.out index cfb7945fb4..c58b024ded 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query12.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query12.q.out @@ -79,11 +79,12 @@ HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3 HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) HiveFilter(condition=[IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books')]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_sales_price=[$23]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out index b87661b78b..b65619f055 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out @@ -120,17 +120,19 @@ HiveProject($f0=[/(CAST($0):DOUBLE, $1)], $f1=[CAST(/($2, $3)):DECIMAL(11, 6)], HiveProject(cd_demo_sk=[$0], ==[=($2, _UTF-16LE'M')], =2=[=($3, _UTF-16LE'4 yr Degree')], =3=[=($2, _UTF-16LE'D')], =4=[=($3, _UTF-16LE'Primary')], =5=[=($2, _UTF-16LE'U')], =6=[=($3, _UTF-16LE'Advanced Degree')]) HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'))]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) - HiveJoin(condition=[AND(=($7, $0), OR(AND($1, $11), AND($2, $12), AND($3, $13)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_quantity=[$10], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], BETWEEN=[BETWEEN(false, $22, 100:DECIMAL(12, 2), 200:DECIMAL(12, 2))], BETWEEN9=[BETWEEN(false, $22, 150:DECIMAL(12, 2), 300:DECIMAL(12, 2))], BETWEEN10=[BETWEEN(false, $22, 50:DECIMAL(12, 2), 250:DECIMAL(12, 2))], BETWEEN11=[BETWEEN(false, $13, 100:DECIMAL(3, 0), 150:DECIMAL(3, 0))], BETWEEN12=[BETWEEN(false, $13, 50:DECIMAL(2, 0), 100:DECIMAL(3, 0))], BETWEEN13=[BETWEEN(false, $13, 150:DECIMAL(3, 0), 200:DECIMAL(3, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($7), OR(<=(100:DECIMAL(3, 0), $13), <=($13, 150:DECIMAL(3, 0)), <=(50:DECIMAL(2, 0), $13), <=($13, 100:DECIMAL(3, 0)), <=(150:DECIMAL(3, 0), $13), <=($13, 200:DECIMAL(3, 0))), OR(<=(100:DECIMAL(12, 2), $22), <=($22, 200:DECIMAL(12, 2)), <=(150:DECIMAL(12, 2), $22), <=($22, 300:DECIMAL(12, 2)), <=(50:DECIMAL(12, 2), $22), <=($22, 250:DECIMAL(12, 2))))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_address_sk=[$0], IN=[$1], IN2=[$2], IN3=[$3], ss_sold_date_sk=[$4], ss_cdemo_sk=[$5], ss_hdemo_sk=[$6], ss_addr_sk=[$7], ss_quantity=[$8], ss_ext_sales_price=[$9], ss_ext_wholesale_cost=[$10], BETWEEN=[$11], BETWEEN9=[$12], BETWEEN10=[$13], BETWEEN11=[$14], BETWEEN12=[$15], BETWEEN13=[$16], d_date_sk=[$17]) + HiveJoin(condition=[AND(=($7, $0), OR(AND($1, $11), AND($2, $12), AND($3, $13)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$1], ss_hdemo_sk=[$2], ss_addr_sk=[$3], ss_quantity=[$4], ss_ext_sales_price=[$5], ss_ext_wholesale_cost=[$6], BETWEEN=[$7], BETWEEN9=[$8], BETWEEN10=[$9], BETWEEN11=[$10], BETWEEN12=[$11], BETWEEN13=[$12], d_date_sk=[$13]) + HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_quantity=[$10], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], BETWEEN=[BETWEEN(false, $22, 100:DECIMAL(12, 2), 200:DECIMAL(12, 2))], BETWEEN9=[BETWEEN(false, $22, 150:DECIMAL(12, 2), 300:DECIMAL(12, 2))], BETWEEN10=[BETWEEN(false, $22, 50:DECIMAL(12, 2), 250:DECIMAL(12, 2))], BETWEEN11=[BETWEEN(false, $13, 100:DECIMAL(3, 0), 150:DECIMAL(3, 0))], BETWEEN12=[BETWEEN(false, $13, 50:DECIMAL(2, 0), 100:DECIMAL(3, 0))], BETWEEN13=[BETWEEN(false, $13, 150:DECIMAL(3, 0), 200:DECIMAL(3, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($7), OR(<=(100:DECIMAL(3, 0), $13), <=($13, 150:DECIMAL(3, 0)), <=(50:DECIMAL(2, 0), $13), <=($13, 100:DECIMAL(3, 0)), <=(150:DECIMAL(3, 0), $13), <=($13, 200:DECIMAL(3, 0))), OR(<=(100:DECIMAL(12, 2), $22), <=($22, 200:DECIMAL(12, 2)), <=(150:DECIMAL(12, 2), $22), <=($22, 300:DECIMAL(12, 2)), <=(50:DECIMAL(12, 2), $22), <=($22, 250:DECIMAL(12, 2))))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(hd_demo_sk=[$0], ==[=($3, 3)], =2=[=($3, 1)]) HiveFilter(condition=[IN($3, 3, 1)]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out index eaa1defa81..13798c9e87 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out @@ -1,6 +1,6 @@ -Warning: Shuffle Join MERGEJOIN[1196][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[1203][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[1210][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 22' is a cross product +Warning: Shuffle Join MERGEJOIN[1202][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[1217][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 21' is a cross product +Warning: Shuffle Join MERGEJOIN[1228][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product PREHOOK: query: explain cbo with cross_items as (select i_item_sk ss_item_sk @@ -234,63 +234,65 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$0]) - HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) - HiveFilter(condition=[=($3, 3)]) - HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) - HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3]) - HiveUnion(all=[true]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iss]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[ics]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iws]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_quantity=[$10], ss_list_price=[$12]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_item_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk0=[$2], ss_quantity=[$3], ss_list_price=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0]) + HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_quantity=[$2], ss_list_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[CAST(/($0, $1)):DECIMAL(22, 6)]) HiveFilter(condition=[IS NOT NULL(CAST(/($0, $1)):DECIMAL(22, 6))]) HiveProject($f0=[$0], $f1=[$1]) @@ -330,63 +332,65 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$0]) - HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) - HiveFilter(condition=[=($3, 3)]) - HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) - HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3]) - HiveUnion(all=[true]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iss]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[ics]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iws]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_item_sk=[$0], cs_sold_date_sk=[$1], cs_item_sk=[$2], cs_quantity=[$3], cs_list_price=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0]) + HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$1], cs_quantity=[$2], cs_list_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[CAST(/($0, $1)):DECIMAL(22, 6)]) HiveFilter(condition=[IS NOT NULL(CAST(/($0, $1)):DECIMAL(22, 6))]) HiveProject($f0=[$0], $f1=[$1]) @@ -426,63 +430,65 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$0]) - HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) - HiveFilter(condition=[=($3, 3)]) - HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) - HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3]) - HiveUnion(all=[true]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iss]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[ics]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iws]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_quantity=[$18], ws_list_price=[$20]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_item_sk=[$0], ws_sold_date_sk=[$1], ws_item_sk=[$2], ws_quantity=[$3], ws_list_price=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0]) + HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_quantity=[$2], ws_list_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[CAST(/($0, $1)):DECIMAL(22, 6)]) HiveFilter(condition=[IS NOT NULL(CAST(/($0, $1)):DECIMAL(22, 6))]) HiveProject($f0=[$0], $f1=[$1]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out index b40ca94ac6..60a9130b9d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query16.q.out @@ -81,13 +81,14 @@ HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[su HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'NY':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"]) HiveFilter(condition=[=($8, _UTF-16LE'NY')]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_ship_date_sk=[$2], cs_ship_addr_sk=[$10], cs_call_center_sk=[$11], cs_warehouse_sk=[$14], cs_order_number=[$17], cs_ext_ship_cost=[$28], cs_net_profit=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($11), IS NOT NULL($10))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs1]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-04-01 00:00:00:TIMESTAMP(9), 2001-05-31 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_ship_date_sk=[$0], cs_ship_addr_sk=[$1], cs_call_center_sk=[$2], cs_warehouse_sk=[$3], cs_order_number=[$4], cs_ext_ship_cost=[$5], cs_net_profit=[$6], d_date_sk=[$7], d_date=[$8]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_ship_date_sk=[$2], cs_ship_addr_sk=[$10], cs_call_center_sk=[$11], cs_warehouse_sk=[$14], cs_order_number=[$17], cs_ext_ship_cost=[$28], cs_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($11), IS NOT NULL($10))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs1]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-04-01 00:00:00:TIMESTAMP(9), 2001-05-31 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cc_call_center_sk=[$0], cc_county=[$25]) HiveFilter(condition=[IN($25, _UTF-16LE'Ziebach County', _UTF-16LE'Levy County', _UTF-16LE'Huron County', _UTF-16LE'Franklin Parish', _UTF-16LE'Daviess County')]) HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query18.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query18.q.out index c5cd8b552d..5194f2666e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query18.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query18.q.out @@ -86,26 +86,27 @@ HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$3], sort3=[$0], dir0=[ASC], dir1=[ HiveJoin(condition=[=($13, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($9, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4], CAST=[CAST($13):DECIMAL(12, 2)]) - HiveFilter(condition=[AND(IN($12, 9, 5, 12, 4, 1, 10), IS NOT NULL($2), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8], ca_country=[$10]) - HiveFilter(condition=[IN($8, _UTF-16LE'ND', _UTF-16LE'WI', _UTF-16LE'AL', _UTF-16LE'NC', _UTF-16LE'OK', _UTF-16LE'MS', _UTF-16LE'TN')]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_bill_cdemo_sk=[$2], cs_item_sk=[$3], CAST=[$4], CAST5=[$5], CAST6=[$6], CAST7=[$7], CAST8=[$8], d_date_sk=[$9], cd_demo_sk=[$10], CAST0=[$11]) - HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_bill_cdemo_sk=[$4], cs_item_sk=[$15], CAST=[CAST($18):DECIMAL(12, 2)], CAST5=[CAST($20):DECIMAL(12, 2)], CAST6=[CAST($27):DECIMAL(12, 2)], CAST7=[CAST($21):DECIMAL(12, 2)], CAST8=[CAST($33):DECIMAL(12, 2)]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4), IS NOT NULL($3))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(cd_demo_sk=[$0], CAST=[CAST($6):DECIMAL(12, 2)]) - HiveFilter(condition=[AND(=($3, _UTF-16LE'College'), =($1, _UTF-16LE'M'))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$1], c_current_addr_sk=[$2], CAST=[$3], ca_address_sk=[$4], ca_county=[$5], ca_state=[$6], ca_country=[$7], cs_sold_date_sk=[$8], cs_bill_customer_sk=[$9], cs_bill_cdemo_sk=[$10], cs_item_sk=[$11], CAST0=[$12], CAST5=[$13], CAST6=[$14], CAST7=[$15], CAST8=[$16], d_date_sk=[$17], cd_demo_sk=[$18], CAST00=[$19]) + HiveJoin(condition=[=($9, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_addr_sk=[$4], CAST=[CAST($13):DECIMAL(12, 2)]) + HiveFilter(condition=[AND(IN($12, 9, 5, 12, 4, 1, 10), IS NOT NULL($2), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8], ca_country=[$10]) + HiveFilter(condition=[IN($8, _UTF-16LE'ND', _UTF-16LE'WI', _UTF-16LE'AL', _UTF-16LE'NC', _UTF-16LE'OK', _UTF-16LE'MS', _UTF-16LE'TN')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_bill_cdemo_sk=[$2], cs_item_sk=[$3], CAST=[$4], CAST5=[$5], CAST6=[$6], CAST7=[$7], CAST8=[$8], d_date_sk=[$9], cd_demo_sk=[$10], CAST0=[$11]) + HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_bill_cdemo_sk=[$4], cs_item_sk=[$15], CAST=[CAST($18):DECIMAL(12, 2)], CAST5=[CAST($20):DECIMAL(12, 2)], CAST6=[CAST($27):DECIMAL(12, 2)], CAST7=[CAST($21):DECIMAL(12, 2)], CAST8=[CAST($33):DECIMAL(12, 2)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cd_demo_sk=[$0], CAST=[CAST($6):DECIMAL(12, 2)]) + HiveFilter(condition=[AND(=($3, _UTF-16LE'College'), =($1, _UTF-16LE'M'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) HiveProject(cd_demo_sk=[$0]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query20.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query20.q.out index 05c9f5fefd..2e825c103f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query20.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query20.q.out @@ -71,11 +71,12 @@ HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3 HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) HiveFilter(condition=[IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books')]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_ext_sales_price=[$23]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$1], cs_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query21.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query21.q.out index fc95a2bde8..983b416e3b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query21.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query21.q.out @@ -75,14 +75,15 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) - HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) - HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) - HiveProject(d_date_sk=[$0], <=[<(CAST($2):DATE, 1998-04-08)], >==[>=(CAST($2):DATE, 1998-04-08)]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], d_date_sk=[$4], <=[$5], >==[$6], i_item_sk=[$7], i_item_id=[$8]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0], <=[<(CAST($2):DATE, 1998-04-08)], >==[>=(CAST($2):DATE, 1998-04-08)]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query22.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query22.q.out index 77f91f696c..ec0ca7f348 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query22.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query22.q.out @@ -57,10 +57,11 @@ HiveSortLimit(sort0=[$4], sort1=[$0], sort2=[$1], sort3=[$2], sort4=[$3], dir0=[ HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_brand=[$8], i_class=[$10], i_category=[$12], i_product_name=[$21]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_quantity_on_hand=[$3]) - HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_quantity_on_hand=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_quantity_on_hand=[$3]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out index bae58bd0e6..a3eed7bf91 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out @@ -1,5 +1,5 @@ -Warning: Shuffle Join MERGEJOIN[358][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 17' is a cross product -Warning: Shuffle Join MERGEJOIN[360][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 23' is a cross product +Warning: Shuffle Join MERGEJOIN[360][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[362][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 13' is a cross product PREHOOK: query: explain cbo with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -145,13 +145,14 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2], cs_quantity=[$3], cs_list_price=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f1=[$0]) HiveFilter(condition=[>($2, 4)]) HiveProject(i_item_sk=[$1], d_date=[$0], $f2=[$2]) @@ -191,13 +192,14 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_customer_sk=[$4], ws_quantity=[$18], ws_list_price=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_bill_customer_sk=[$2], ws_quantity=[$3], ws_list_price=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_customer_sk=[$4], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f1=[$0]) HiveFilter(condition=[>($2, 4)]) HiveProject(i_item_sk=[$1], d_date=[$0], $f2=[$2]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out index 896495235d..8ac4287720 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[301][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[303][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo with ssales as (select c_last_name @@ -135,13 +135,14 @@ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) HiveFilter(condition=[IS NOT NULL($4)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($7, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) - HiveFilter(condition=[IS NOT NULL($9)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) - HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(ca_address_sk=[$0], ca_state=[$1], ca_zip=[$2], UPPER=[$3], s_store_sk=[$4], s_store_name=[$5], s_state=[$6], s_zip=[$7]) + HiveJoin(condition=[=($7, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) + HiveFilter(condition=[IS NOT NULL($9)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) + HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_units=[$18], i_manager_id=[$20]) HiveFilter(condition=[=($17, _UTF-16LE'orchid')]) HiveTableScan(table=[[default, item]], table:alias=[item]) @@ -164,13 +165,14 @@ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) HiveFilter(condition=[IS NOT NULL($4)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($7, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) - HiveFilter(condition=[IS NOT NULL($9)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) - HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(ca_address_sk=[$0], ca_state=[$1], ca_zip=[$2], UPPER=[$3], s_store_sk=[$4], s_store_name=[$5], s_state=[$6], s_zip=[$7]) + HiveJoin(condition=[=($7, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) + HiveFilter(condition=[IS NOT NULL($9)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) + HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query26.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query26.q.out index 0e8988986e..ebd5f5cc5d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query26.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query26.q.out @@ -57,19 +57,20 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($3, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_cdemo_sk=[$4], cs_item_sk=[$15], cs_promo_sk=[$16], cs_quantity=[$18], cs_list_price=[$20], cs_sales_price=[$21], cs_coupon_amt=[$27]) - HiveFilter(condition=[AND(IS NOT NULL($16), IS NOT NULL($0), IS NOT NULL($4))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(cd_demo_sk=[$0]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'W'), =($3, _UTF-16LE'Primary'), =($1, _UTF-16LE'F'))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 1998)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[OR(=($9, _UTF-16LE'N'), =($14, _UTF-16LE'N'))]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_cdemo_sk=[$1], cs_item_sk=[$2], cs_promo_sk=[$3], cs_quantity=[$4], cs_list_price=[$5], cs_sales_price=[$6], cs_coupon_amt=[$7], cd_demo_sk=[$8], d_date_sk=[$9], p_promo_sk=[$10]) + HiveJoin(condition=[=($3, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_cdemo_sk=[$4], cs_item_sk=[$15], cs_promo_sk=[$16], cs_quantity=[$18], cs_list_price=[$20], cs_sales_price=[$21], cs_coupon_amt=[$27]) + HiveFilter(condition=[AND(IS NOT NULL($16), IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'W'), =($3, _UTF-16LE'Primary'), =($1, _UTF-16LE'F'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[OR(=($9, _UTF-16LE'N'), =($14, _UTF-16LE'N'))]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query27.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query27.q.out index 4ce4b8f143..bfaeed3952 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query27.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query27.q.out @@ -62,19 +62,20 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($3, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_cdemo_sk=[$4], ss_store_sk=[$7], ss_quantity=[$10], ss_list_price=[$12], ss_sales_price=[$13], ss_coupon_amt=[$19]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(cd_demo_sk=[$0]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'U'), =($3, _UTF-16LE'2 yr Degree'), =($1, _UTF-16LE'M'))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(s_store_sk=[$0], s_state=[$24]) - HiveFilter(condition=[IN($24, _UTF-16LE'SD', _UTF-16LE'FL', _UTF-16LE'MI', _UTF-16LE'LA', _UTF-16LE'MO', _UTF-16LE'SC')]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_cdemo_sk=[$2], ss_store_sk=[$3], ss_quantity=[$4], ss_list_price=[$5], ss_sales_price=[$6], ss_coupon_amt=[$7], cd_demo_sk=[$8], d_date_sk=[$9], s_store_sk=[$10], s_state=[$11]) + HiveJoin(condition=[=($3, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_cdemo_sk=[$4], ss_store_sk=[$7], ss_quantity=[$10], ss_list_price=[$12], ss_sales_price=[$13], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'U'), =($3, _UTF-16LE'2 yr Degree'), =($1, _UTF-16LE'M'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_state=[$24]) + HiveFilter(condition=[IN($24, _UTF-16LE'SD', _UTF-16LE'FL', _UTF-16LE'MI', _UTF-16LE'LA', _UTF-16LE'MO', _UTF-16LE'SC')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query30.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query30.q.out index ae81e139f3..a47af955a4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query30.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query30.q.out @@ -89,13 +89,14 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5= HiveProject(ca_address_sk=[$0], ca_state=[$8]) HiveFilter(condition=[IS NOT NULL($8)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(wr_returned_date_sk=[$0], wr_returning_customer_sk=[$7], wr_returning_addr_sk=[$10], wr_return_amt=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0), IS NOT NULL($10))]) - HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(wr_returned_date_sk=[$0], wr_returning_customer_sk=[$1], wr_returning_addr_sk=[$2], wr_return_amt=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returned_date_sk=[$0], wr_returning_customer_sk=[$7], wr_returning_addr_sk=[$10], wr_return_amt=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($0), IS NOT NULL($10))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2, 1))], ctr_state=[$0]) HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]) HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[count($2)]) @@ -105,11 +106,12 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5= HiveProject(ca_address_sk=[$0], ca_state=[$8]) HiveFilter(condition=[IS NOT NULL($8)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(wr_returned_date_sk=[$0], wr_returning_customer_sk=[$7], wr_returning_addr_sk=[$10], wr_return_amt=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($10))]) - HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(wr_returned_date_sk=[$0], wr_returning_customer_sk=[$1], wr_returning_addr_sk=[$2], wr_return_amt=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returned_date_sk=[$0], wr_returning_customer_sk=[$7], wr_returning_addr_sk=[$10], wr_return_amt=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($10))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out index ac1764a61b..39dc539f52 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out @@ -121,39 +121,42 @@ HiveProject(ca_county=[$8], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($ HiveProject(ca_address_sk=[$0], ca_county=[$7]) HiveFilter(condition=[IS NOT NULL($7)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$1], ws_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($10, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(ca_county=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) HiveFilter(condition=[IS NOT NULL($7)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$1], ws_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($10, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f3=[$1], >=[>($1, 0:DECIMAL(1, 0))]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) HiveFilter(condition=[IS NOT NULL($7)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$1], ws_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($10, 2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$0], $f1=[$1], $f00=[$2], $f10=[$3], $f01=[$4], $f11=[$5]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -163,37 +166,40 @@ HiveProject(ca_county=[$8], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($ HiveProject(ca_address_sk=[$0], ca_county=[$7]) HiveFilter(condition=[IS NOT NULL($7)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$1], ss_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($10, 2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(ca_county=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) HiveFilter(condition=[IS NOT NULL($7)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$1], ss_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($10, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(ca_county=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($4)]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_county=[$7]) HiveFilter(condition=[IS NOT NULL($7)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($10, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$1], ss_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($10, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out index 673dc3da57..d94e5a4bfd 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out @@ -182,13 +182,14 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_addr_sk=[$2], ss_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_manufact_id=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($7)]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -205,13 +206,14 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$1], cs_item_sk=[$2], cs_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_manufact_id=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($7)]) HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -228,11 +230,12 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_bill_addr_sk=[$2], ws_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query38.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query38.q.out index 1566387a20..ba7a0d2817 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query38.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query38.q.out @@ -68,13 +68,14 @@ HiveAggregate(group=[{}], agg#0=[count()]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], d_date_sk=[$2], d_date=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) @@ -82,13 +83,14 @@ HiveAggregate(group=[{}], agg#0=[count()]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], d_date_sk=[$2], d_date=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) @@ -96,11 +98,12 @@ HiveAggregate(group=[{}], agg#0=[count()]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], d_date_sk=[$2], d_date=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out index bb4ed738c6..88e8c3844c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out @@ -235,78 +235,87 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[AND(=($7, $0), CASE($6, CASE($11, >(/($3, $10), /($1, $5)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f8=[$7]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], /=[$2], d_date_sk=[$3]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 2002)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f8=[$1], $f00=[$2], $f80=[$3], customer_id=[$4], year_total=[$5], CAST=[$6], $f000=[$7], $f800=[$8], customer_id0=[$9], year_total0=[$10], CAST0=[$11]) + HiveJoin(condition=[AND(=($7, $0), CASE($6, CASE($11, >(/($3, $10), /($1, $5)), false), false))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f8=[$7]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($3, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], /=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f8=[$1], customer_id=[$2], year_total=[$3], CAST=[$4], $f00=[$5], $f80=[$6], customer_id0=[$7], year_total0=[$8], CAST0=[$9]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f8=[$7]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) - HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) - HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], /=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$1], CAST=[$2], $f0=[$3], $f8=[$4], customer_id0=[$5], year_total0=[$6], CAST0=[$7]) + HiveJoin(condition=[=($3, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) + HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], /=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f8=[$7]) + HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], /=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$7], CAST=[CAST(IS NOT NULL($7)):BOOLEAN]) + HiveFilter(condition=[>($7, 0:DECIMAL(1, 0))]) + HiveAggregate(group=[{1, 2, 3, 4, 5, 6, 7}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], /=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query40.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query40.q.out index f71c0b57db..b5b10a8ee6 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query40.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query40.q.out @@ -72,18 +72,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(w_warehouse_sk=[$0], w_state=[$10]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) - HiveJoin(condition=[=($11, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($3, $6), =($2, $5))], joinType=[left], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_warehouse_sk=[$14], cs_item_sk=[$15], cs_order_number=[$17], cs_sales_price=[$21]) - HiveFilter(condition=[AND(IS NOT NULL($14), IS NOT NULL($0))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_refunded_cash=[$23]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(d_date_sk=[$0], <=[<(CAST($2):DATE, 1998-04-08)], >==[>=(CAST($2):DATE, 1998-04-08)]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_sold_date_sk=[$0], cs_warehouse_sk=[$1], cs_item_sk=[$2], cs_order_number=[$3], cs_sales_price=[$4], cr_item_sk=[$5], cr_order_number=[$6], cr_refunded_cash=[$7], d_date_sk=[$8], <=[$9], >==[$10], i_item_sk=[$11], i_item_id=[$12]) + HiveJoin(condition=[=($11, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($3, $6), =($2, $5))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_warehouse_sk=[$14], cs_item_sk=[$15], cs_order_number=[$17], cs_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($14), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_refunded_cash=[$23]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0], <=[<(CAST($2):DATE, 1998-04-08)], >==[>=(CAST($2):DATE, 1998-04-08)]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query46.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query46.q.out index d0a32f7aab..0ed2e120dc 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query46.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query46.q.out @@ -95,19 +95,20 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], dir0=[ HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_city=[$6]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_coupon_amt=[$19], ss_net_profit=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(IN($6, 1998, 1999, 2000), IN($7, 6, 0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[IN($22, _UTF-16LE'Cedar Grove', _UTF-16LE'Wildwood', _UTF-16LE'Union', _UTF-16LE'Salem', _UTF-16LE'Highland Park')]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[OR(=($4, 1), =($3, 2))]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], ss_hdemo_sk=[$2], ss_addr_sk=[$3], ss_store_sk=[$4], ss_ticket_number=[$5], ss_coupon_amt=[$6], ss_net_profit=[$7], d_date_sk=[$8], s_store_sk=[$9], hd_demo_sk=[$10]) + HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_coupon_amt=[$19], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(IN($6, 1998, 1999, 2000), IN($7, 6, 0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IN($22, _UTF-16LE'Cedar Grove', _UTF-16LE'Wildwood', _UTF-16LE'Union', _UTF-16LE'Salem', _UTF-16LE'Highland Park')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[OR(=($4, 1), =($3, 2))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query47.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query47.q.out index e71fd8778a..b6985657fe 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query47.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query47.q.out @@ -133,45 +133,46 @@ HiveProject(i_category=[$0], d_year=[$1], d_moy=[$2], avg_monthly_sales=[$3], su HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[AND(=($6, $0), =($7, $1), =($8, $2), =($9, $3), =($14, $5))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$4], +=[+($5, 1)]) - HiveFilter(condition=[IS NOT NULL($5)]) - HiveProject((tok_table_or_col i_category)=[$5], (tok_table_or_col i_brand)=[$4], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], rank_window_1=[rank() OVER (PARTITION BY $5, $4, $2, $3 ORDER BY $0 NULLS LAST, $1 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) - HiveProject(d_year=[$0], d_moy=[$1], s_store_name=[$2], s_company_name=[$3], i_brand=[$4], i_category=[$5], $f6=[$6]) - HiveAggregate(group=[{5, 6, 8, 9, 11, 12}], agg#0=[sum($3)]) - HiveJoin(condition=[=($1, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) - HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($17))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) - HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_table_or_col d_year)=[$4], (tok_table_or_col d_moy)=[$5], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[$7], rank_window_1=[$8]) - HiveFilter(condition=[AND(=($4, 2000), >($7, 0:DECIMAL(1, 0)), CASE(>($7, 0:DECIMAL(1, 0)), >(/(ABS(-($6, $7)), $7), 0.1:DECIMAL(2, 1)), false), IS NOT NULL($8))]) - HiveProject((tok_table_or_col i_category)=[$5], (tok_table_or_col i_brand)=[$4], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_table_or_col d_year)=[$0], (tok_table_or_col d_moy)=[$1], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[avg($6) OVER (PARTITION BY $5, $4, $2, $3, $0 ORDER BY $5 NULLS FIRST, $4 NULLS FIRST, $2 NULLS FIRST, $3 NULLS FIRST, $0 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY $5, $4, $2, $3 ORDER BY $0 NULLS LAST, $1 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) - HiveProject(d_year=[$0], d_moy=[$1], s_store_name=[$2], s_company_name=[$3], i_brand=[$4], i_category=[$5], $f6=[$6]) - HiveAggregate(group=[{5, 6, 8, 9, 11, 12}], agg#0=[sum($3)]) - HiveJoin(condition=[=($1, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) - HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($17))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) - HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$4], +=[$5], (tok_table_or_col i_category)0=[$6], (tok_table_or_col i_brand)0=[$7], (tok_table_or_col s_store_name)0=[$8], (tok_table_or_col s_company_name)0=[$9], (tok_table_or_col d_year)=[$10], (tok_table_or_col d_moy)=[$11], (tok_function sum (tok_table_or_col ss_sales_price))0=[$12], avg_window_0=[$13], rank_window_1=[$14]) + HiveJoin(condition=[AND(=($6, $0), =($7, $1), =($8, $2), =($9, $3), =($14, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$4], +=[+($5, 1)]) + HiveFilter(condition=[IS NOT NULL($5)]) + HiveProject((tok_table_or_col i_category)=[$5], (tok_table_or_col i_brand)=[$4], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], rank_window_1=[rank() OVER (PARTITION BY $5, $4, $2, $3 ORDER BY $0 NULLS LAST, $1 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(d_year=[$0], d_moy=[$1], s_store_name=[$2], s_company_name=[$3], i_brand=[$4], i_category=[$5], $f6=[$6]) + HiveAggregate(group=[{5, 6, 8, 9, 11, 12}], agg#0=[sum($3)]) + HiveJoin(condition=[=($1, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($17))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_table_or_col d_year)=[$4], (tok_table_or_col d_moy)=[$5], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[$7], rank_window_1=[$8]) + HiveFilter(condition=[AND(=($4, 2000), >($7, 0:DECIMAL(1, 0)), CASE(>($7, 0:DECIMAL(1, 0)), >(/(ABS(-($6, $7)), $7), 0.1:DECIMAL(2, 1)), false), IS NOT NULL($8))]) + HiveProject((tok_table_or_col i_category)=[$5], (tok_table_or_col i_brand)=[$4], (tok_table_or_col s_store_name)=[$2], (tok_table_or_col s_company_name)=[$3], (tok_table_or_col d_year)=[$0], (tok_table_or_col d_moy)=[$1], (tok_function sum (tok_table_or_col ss_sales_price))=[$6], avg_window_0=[avg($6) OVER (PARTITION BY $5, $4, $2, $3, $0 ORDER BY $5 NULLS FIRST, $4 NULLS FIRST, $2 NULLS FIRST, $3 NULLS FIRST, $0 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY $5, $4, $2, $3 ORDER BY $0 NULLS LAST, $1 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(d_year=[$0], d_moy=[$1], s_store_name=[$2], s_company_name=[$3], i_brand=[$4], i_category=[$5], $f6=[$6]) + HiveAggregate(group=[{5, 6, 8, 9, 11, 12}], agg#0=[sum($3)]) + HiveJoin(condition=[=($1, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_name=[$17]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($17))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query48.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query48.q.out index 4be0b33782..a7a294c070 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query48.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query48.q.out @@ -146,15 +146,17 @@ HiveAggregate(group=[{}], agg#0=[sum($8)]) HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'M'), =($3, _UTF-16LE'4 yr Degree'))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) - HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_addr_sk=[$6], ss_quantity=[$10], BETWEEN=[BETWEEN(false, $22, 0:DECIMAL(12, 2), 2000:DECIMAL(12, 2))], BETWEEN6=[BETWEEN(false, $22, 150:DECIMAL(12, 2), 3000:DECIMAL(12, 2))], BETWEEN7=[BETWEEN(false, $22, 50:DECIMAL(12, 2), 25000:DECIMAL(12, 2))]) - HiveFilter(condition=[AND(OR(BETWEEN(false, $13, 100:DECIMAL(3, 0), 150:DECIMAL(3, 0)), BETWEEN(false, $13, 50:DECIMAL(2, 0), 100:DECIMAL(3, 0)), BETWEEN(false, $13, 150:DECIMAL(3, 0), 200:DECIMAL(3, 0))), IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($7), OR(<=(100:DECIMAL(3, 0), $13), <=($13, 150:DECIMAL(3, 0)), <=(50:DECIMAL(2, 0), $13), <=($13, 100:DECIMAL(3, 0)), <=(150:DECIMAL(3, 0), $13), <=($13, 200:DECIMAL(3, 0))), OR(<=(0:DECIMAL(12, 2), $22), <=($22, 2000:DECIMAL(12, 2)), <=(150:DECIMAL(12, 2), $22), <=($22, 3000:DECIMAL(12, 2)), <=(50:DECIMAL(12, 2), $22), <=($22, 25000:DECIMAL(12, 2))))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 1998)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cd_demo_sk=[$0], ss_sold_date_sk=[$1], ss_cdemo_sk=[$2], ss_addr_sk=[$3], ss_quantity=[$4], BETWEEN=[$5], BETWEEN6=[$6], BETWEEN7=[$7], d_date_sk=[$8]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'M'), =($3, _UTF-16LE'4 yr Degree'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$1], ss_addr_sk=[$2], ss_quantity=[$3], BETWEEN=[$4], BETWEEN6=[$5], BETWEEN7=[$6], d_date_sk=[$7]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_addr_sk=[$6], ss_quantity=[$10], BETWEEN=[BETWEEN(false, $22, 0:DECIMAL(12, 2), 2000:DECIMAL(12, 2))], BETWEEN6=[BETWEEN(false, $22, 150:DECIMAL(12, 2), 3000:DECIMAL(12, 2))], BETWEEN7=[BETWEEN(false, $22, 50:DECIMAL(12, 2), 25000:DECIMAL(12, 2))]) + HiveFilter(condition=[AND(OR(BETWEEN(false, $13, 100:DECIMAL(3, 0), 150:DECIMAL(3, 0)), BETWEEN(false, $13, 50:DECIMAL(2, 0), 100:DECIMAL(3, 0)), BETWEEN(false, $13, 150:DECIMAL(3, 0), 200:DECIMAL(3, 0))), IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($7), OR(<=(100:DECIMAL(3, 0), $13), <=($13, 150:DECIMAL(3, 0)), <=(50:DECIMAL(2, 0), $13), <=($13, 100:DECIMAL(3, 0)), <=(150:DECIMAL(3, 0), $13), <=($13, 200:DECIMAL(3, 0))), OR(<=(0:DECIMAL(12, 2), $22), <=($22, 2000:DECIMAL(12, 2)), <=(150:DECIMAL(12, 2), $22), <=($22, 3000:DECIMAL(12, 2)), <=(50:DECIMAL(12, 2), $22), <=($22, 25000:DECIMAL(12, 2))))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query49.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query49.q.out index b499ae96cf..ba35e1c333 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query49.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query49.q.out @@ -286,13 +286,14 @@ HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15]) HiveFilter(condition=[>($15, 10000:DECIMAL(5, 0))]) HiveTableScan(table=[[default, web_returns]], table:alias=[wr]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_net_paid=[$29], ws_net_profit=[$33]) - HiveFilter(condition=[AND(>($33, 1:DECIMAL(1, 0)), >($29, 0:DECIMAL(1, 0)), >($18, 0), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 12))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_order_number=[$2], ws_quantity=[$3], ws_net_paid=[$4], ws_net_profit=[$5], d_date_sk=[$6]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_net_paid=[$29], ws_net_profit=[$33]) + HiveFilter(condition=[AND(>($33, 1:DECIMAL(1, 0)), >($29, 0:DECIMAL(1, 0)), >($18, 0), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 12))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(channel=[_UTF-16LE'catalog':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], item=[$0], return_ratio=[$1], return_rank=[$2], currency_rank=[$3]) HiveFilter(condition=[OR(<=($2, 10), <=($3, 10))]) HiveProject(item=[$0], return_ratio=[/(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4))], rank_window_0=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($3):DECIMAL(15, 4), CAST($4):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) @@ -303,13 +304,14 @@ HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_quantity=[$17], cr_return_amount=[$18]) HiveFilter(condition=[>($18, 10000:DECIMAL(5, 0))]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[cr]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_net_paid=[$29], cs_net_profit=[$33]) - HiveFilter(condition=[AND(>($33, 1:DECIMAL(1, 0)), >($29, 0:DECIMAL(1, 0)), >($18, 0), IS NOT NULL($0))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 12))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$1], cs_order_number=[$2], cs_quantity=[$3], cs_net_paid=[$4], cs_net_profit=[$5], d_date_sk=[$6]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_net_paid=[$29], cs_net_profit=[$33]) + HiveFilter(condition=[AND(>($33, 1:DECIMAL(1, 0)), >($29, 0:DECIMAL(1, 0)), >($18, 0), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 12))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(channel=[_UTF-16LE'store':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], item=[$0], return_ratio=[$1], return_rank=[$2], currency_rank=[$3]) HiveFilter(condition=[OR(<=($2, 10), <=($3, 10))]) HiveProject(item=[$0], return_ratio=[/(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4))], rank_window_0=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($1):DECIMAL(15, 4), CAST($2):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY 0 ORDER BY /(CAST($3):DECIMAL(15, 4), CAST($4):DECIMAL(15, 4)) NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) @@ -320,11 +322,12 @@ HiveSortLimit(sort0=[$0], sort1=[$3], sort2=[$4], dir0=[ASC], dir1=[ASC], dir2=[ HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_quantity=[$10], sr_return_amt=[$11]) HiveFilter(condition=[>($11, 10000:DECIMAL(5, 0))]) HiveTableScan(table=[[default, store_returns]], table:alias=[sr]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_net_paid=[$20], ss_net_profit=[$22]) - HiveFilter(condition=[AND(>($22, 1:DECIMAL(1, 0)), >($20, 0:DECIMAL(1, 0)), >($10, 0), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[sts]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 12))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_ticket_number=[$2], ss_quantity=[$3], ss_net_paid=[$4], ss_net_profit=[$5], d_date_sk=[$6]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_net_paid=[$20], ss_net_profit=[$22]) + HiveFilter(condition=[AND(>($22, 1:DECIMAL(1, 0)), >($20, 0:DECIMAL(1, 0)), >($10, 0), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[sts]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 12))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query5.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query5.q.out index 298f8e7eae..45f0cc0a87 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query5.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query5.q.out @@ -302,18 +302,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) - HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(page_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) - HiveUnion(all=[true]) - HiveProject(page_sk=[$12], date_sk=[$0], sales_price=[$23], profit=[$33], return_amt=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], net_loss=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(page_sk=[$12], date_sk=[$0], sales_price=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], profit=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], return_amt=[$18], net_loss=[$26]) - HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($0))]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(page_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5], d_date_sk=[$6]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(page_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) + HiveUnion(all=[true]) + HiveProject(page_sk=[$12], date_sk=[$0], sales_price=[$23], profit=[$33], return_amt=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], net_loss=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($12))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(page_sk=[$12], date_sk=[$0], sales_price=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], profit=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], return_amt=[$18], net_loss=[$26]) + HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(channel=[_UTF-16LE'web channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'web_site', $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) HiveAggregate(group=[{8}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query50.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query50.q.out index 908f1a3d11..eee9ed8c57 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query50.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query50.q.out @@ -134,13 +134,14 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5= HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3), IS NOT NULL($7))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$1], sr_customer_sk=[$2], sr_ticket_number=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) HiveProject(s_store_sk=[$0], s_store_name=[$5], s_company_id=[$16], s_street_number=[$18], s_street_name=[$19], s_street_type=[$20], s_suite_number=[$21], s_city=[$22], s_county=[$23], s_state=[$24], s_zip=[$25]) HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out index ce41e9e2d2..ab46b6a4a1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[279][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[285][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[284][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 15' is a cross product -Warning: Shuffle Join MERGEJOIN[287][tables = [$hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[281][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[287][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[286][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 15' is a cross product +Warning: Shuffle Join MERGEJOIN[289][tables = [$hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 16' is a cross product PREHOOK: query: explain cbo with my_customers as ( select distinct c_customer_sk @@ -169,29 +169,31 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) HiveFilter(condition=[IS NOT NULL($4)]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2]) - HiveUnion(all=[true]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(sold_date_sk=[$0], customer_sk=[$4], item_sk=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[AND(=($10, _UTF-16LE'consignment'), =($12, _UTF-16LE'Jewelry'))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($8))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(s_county=[$23], s_state=[$24]) - HiveFilter(condition=[AND(IS NOT NULL($23), IS NOT NULL($24))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2], d_date_sk=[$3], i_item_sk=[$4]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2]) + HiveUnion(all=[true]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(sold_date_sk=[$0], customer_sk=[$4], item_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[AND(=($10, _UTF-16LE'consignment'), =($12, _UTF-16LE'Jewelry'))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4]) + HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($8))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_county=[$23], s_state=[$24]) + HiveFilter(condition=[AND(IS NOT NULL($23), IS NOT NULL($24))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(d_date_sk=[$0], d_month_seq=[$1], $f0=[$2], $f00=[$3]) HiveJoin(condition=[<=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[<=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out index e94e9988b1..0a7292c4bd 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out @@ -167,13 +167,14 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_addr_sk=[$2], ss_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($7)]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -189,13 +190,14 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$1], cs_item_sk=[$2], cs_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($7)]) HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -211,11 +213,12 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_bill_addr_sk=[$2], ws_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query57.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query57.q.out index b3ae965061..ac0d97b441 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query57.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query57.q.out @@ -127,45 +127,46 @@ HiveProject(i_category=[$0], i_brand=[$1], d_year=[$2], d_moy=[$3], avg_monthly_ HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[AND(=($5, $0), =($6, $1), =($7, $2), =($12, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_function sum (tok_table_or_col cs_sales_price))=[$3], +=[+($4, 1)]) - HiveFilter(condition=[IS NOT NULL($4)]) - HiveProject((tok_table_or_col i_category)=[$4], (tok_table_or_col i_brand)=[$3], (tok_table_or_col cc_name)=[$2], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], rank_window_1=[rank() OVER (PARTITION BY $4, $3, $2 ORDER BY $0 NULLS LAST, $1 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) - HiveProject(d_year=[$0], d_moy=[$1], cc_name=[$2], i_brand=[$3], i_category=[$4], $f5=[$5]) - HiveAggregate(group=[{5, 6, 8, 10, 11}], agg#0=[sum($3)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_item_sk=[$15], cs_sales_price=[$21]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($11))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) - HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) - HiveFilter(condition=[IS NOT NULL($6)]) - HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) - HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) - HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_table_or_col d_year)=[$3], (tok_table_or_col d_moy)=[$4], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], avg_window_0=[$6], rank_window_1=[$7]) - HiveFilter(condition=[AND(=($3, 2000), >($6, 0:DECIMAL(1, 0)), CASE(>($6, 0:DECIMAL(1, 0)), >(/(ABS(-($5, $6)), $6), 0.1:DECIMAL(2, 1)), false), IS NOT NULL($7))]) - HiveProject((tok_table_or_col i_category)=[$4], (tok_table_or_col i_brand)=[$3], (tok_table_or_col cc_name)=[$2], (tok_table_or_col d_year)=[$0], (tok_table_or_col d_moy)=[$1], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], avg_window_0=[avg($5) OVER (PARTITION BY $4, $3, $2, $0 ORDER BY $4 NULLS FIRST, $3 NULLS FIRST, $2 NULLS FIRST, $0 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY $4, $3, $2 ORDER BY $0 NULLS LAST, $1 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) - HiveProject(d_year=[$0], d_moy=[$1], cc_name=[$2], i_brand=[$3], i_category=[$4], $f5=[$5]) - HiveAggregate(group=[{5, 6, 8, 10, 11}], agg#0=[sum($3)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_item_sk=[$15], cs_sales_price=[$21]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($11))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) - HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) - HiveFilter(condition=[IS NOT NULL($6)]) - HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) - HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) - HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_function sum (tok_table_or_col cs_sales_price))=[$3], +=[$4], (tok_table_or_col i_category)0=[$5], (tok_table_or_col i_brand)0=[$6], (tok_table_or_col cc_name)0=[$7], (tok_table_or_col d_year)=[$8], (tok_table_or_col d_moy)=[$9], (tok_function sum (tok_table_or_col cs_sales_price))0=[$10], avg_window_0=[$11], rank_window_1=[$12]) + HiveJoin(condition=[AND(=($5, $0), =($6, $1), =($7, $2), =($12, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_function sum (tok_table_or_col cs_sales_price))=[$3], +=[+($4, 1)]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveProject((tok_table_or_col i_category)=[$4], (tok_table_or_col i_brand)=[$3], (tok_table_or_col cc_name)=[$2], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], rank_window_1=[rank() OVER (PARTITION BY $4, $3, $2 ORDER BY $0 NULLS LAST, $1 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(d_year=[$0], d_moy=[$1], cc_name=[$2], i_brand=[$3], i_category=[$4], $f5=[$5]) + HiveAggregate(group=[{5, 6, 8, 10, 11}], agg#0=[sum($3)]) + HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_item_sk=[$15], cs_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($11))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) + HiveFilter(condition=[IS NOT NULL($6)]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject((tok_table_or_col i_category)=[$0], (tok_table_or_col i_brand)=[$1], (tok_table_or_col cc_name)=[$2], (tok_table_or_col d_year)=[$3], (tok_table_or_col d_moy)=[$4], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], avg_window_0=[$6], rank_window_1=[$7]) + HiveFilter(condition=[AND(=($3, 2000), >($6, 0:DECIMAL(1, 0)), CASE(>($6, 0:DECIMAL(1, 0)), >(/(ABS(-($5, $6)), $6), 0.1:DECIMAL(2, 1)), false), IS NOT NULL($7))]) + HiveProject((tok_table_or_col i_category)=[$4], (tok_table_or_col i_brand)=[$3], (tok_table_or_col cc_name)=[$2], (tok_table_or_col d_year)=[$0], (tok_table_or_col d_moy)=[$1], (tok_function sum (tok_table_or_col cs_sales_price))=[$5], avg_window_0=[avg($5) OVER (PARTITION BY $4, $3, $2, $0 ORDER BY $4 NULLS FIRST, $3 NULLS FIRST, $2 NULLS FIRST, $0 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], rank_window_1=[rank() OVER (PARTITION BY $4, $3, $2 ORDER BY $0 NULLS LAST, $1 NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)]) + HiveProject(d_year=[$0], d_moy=[$1], cc_name=[$2], i_brand=[$3], i_category=[$4], $f5=[$5]) + HiveAggregate(group=[{5, 6, 8, 10, 11}], agg#0=[sum($3)]) + HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_call_center_sk=[$11], cs_item_sk=[$15], cs_sales_price=[$21]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($11))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_year=[$6], d_moy=[$8]) + HiveFilter(condition=[AND(IN($6, 2000, 1999, 2001), OR(=($6, 2000), IN(ROW($6, $8), ROW(1999, 12), ROW(2001, 1))))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_name=[$6]) + HiveFilter(condition=[IS NOT NULL($6)]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) + HiveProject(i_item_sk=[$0], i_brand=[$8], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($8))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out index f26eae0acc..aee1f9bd52 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[407][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 20' is a cross product +Warning: Shuffle Join MERGEJOIN[413][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 22' is a cross product PREHOOK: query: explain cbo with ss_items as (select i_item_id item_id @@ -151,26 +151,28 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_ext_sales_price=[$23]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date=[$0]) - HiveAggregate(group=[{0}]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cnt=[$0], d_week_seq=[$1]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) @@ -180,26 +182,28 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date=[$0]) - HiveAggregate(group=[{0}]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cnt=[$0], d_week_seq=[$1]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(item_id=[$0], ws_item_rev=[$1], *=[*(0.9:DECIMAL(2, 1), $1)], *3=[*(1.1:DECIMAL(2, 1), $1)]) @@ -209,26 +213,28 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_sales_price=[$23]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date=[$0]) - HiveAggregate(group=[{0}]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cnt=[$0], d_week_seq=[$1]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveFilter(condition=[=($2, _UTF-16LE'1998-02-19')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query59.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query59.q.out index 8d17cc79d1..b58a8e7b44 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query59.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query59.q.out @@ -99,27 +99,10 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($9, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8]) - HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)], agg#4=[sum($6)], agg#5=[sum($7)], agg#6=[sum($8)]) - HiveProject($f0=[$4], $f1=[$1], $f2=[CASE($5, $2, null:DECIMAL(7, 2))], $f3=[CASE($6, $2, null:DECIMAL(7, 2))], $f4=[CASE($7, $2, null:DECIMAL(7, 2))], $f5=[CASE($8, $2, null:DECIMAL(7, 2))], $f6=[CASE($9, $2, null:DECIMAL(7, 2))], $f7=[CASE($10, $2, null:DECIMAL(7, 2))], $f8=[CASE($11, $2, null:DECIMAL(7, 2))]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_week_seq=[$4], ==[=($14, _UTF-16LE'Sunday')], =3=[=($14, _UTF-16LE'Monday')], =4=[=($14, _UTF-16LE'Tuesday')], =5=[=($14, _UTF-16LE'Wednesday')], =6=[=($14, _UTF-16LE'Thursday')], =7=[=($14, _UTF-16LE'Friday')], =8=[=($14, _UTF-16LE'Saturday')]) - HiveFilter(condition=[IS NOT NULL($4)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1185, 1196), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d]) - HiveProject(d_week_seq2=[$2], s_store_id2=[$1], sun_sales2=[$4], mon_sales2=[$5], wed_sales2=[$6], thu_sales2=[$7], fri_sales2=[$8], sat_sales2=[$9]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0], s_store_id=[$1]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) - HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($5)], agg#3=[sum($6)], agg#4=[sum($7)], agg#5=[sum($8)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8], d_week_seq=[$9]) + HiveJoin(condition=[=($9, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8]) + HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)], agg#4=[sum($6)], agg#5=[sum($7)], agg#6=[sum($8)]) HiveProject($f0=[$4], $f1=[$1], $f2=[CASE($5, $2, null:DECIMAL(7, 2))], $f3=[CASE($6, $2, null:DECIMAL(7, 2))], $f4=[CASE($7, $2, null:DECIMAL(7, 2))], $f5=[CASE($8, $2, null:DECIMAL(7, 2))], $f6=[CASE($9, $2, null:DECIMAL(7, 2))], $f7=[CASE($10, $2, null:DECIMAL(7, 2))], $f8=[CASE($11, $2, null:DECIMAL(7, 2))]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) @@ -129,6 +112,25 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[IS NOT NULL($4)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1197, 1208), IS NOT NULL($4))]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1185, 1196), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[d]) + HiveProject(d_week_seq2=[$2], s_store_id2=[$1], sun_sales2=[$4], mon_sales2=[$5], wed_sales2=[$6], thu_sales2=[$7], fri_sales2=[$8], sat_sales2=[$9]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_id=[$1]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], d_week_seq=[$8]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) + HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($5)], agg#3=[sum($6)], agg#4=[sum($7)], agg#5=[sum($8)]) + HiveProject($f0=[$4], $f1=[$1], $f2=[CASE($5, $2, null:DECIMAL(7, 2))], $f3=[CASE($6, $2, null:DECIMAL(7, 2))], $f4=[CASE($7, $2, null:DECIMAL(7, 2))], $f5=[CASE($8, $2, null:DECIMAL(7, 2))], $f6=[CASE($9, $2, null:DECIMAL(7, 2))], $f7=[CASE($10, $2, null:DECIMAL(7, 2))], $f8=[CASE($11, $2, null:DECIMAL(7, 2))]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4], ==[=($14, _UTF-16LE'Sunday')], =3=[=($14, _UTF-16LE'Monday')], =4=[=($14, _UTF-16LE'Tuesday')], =5=[=($14, _UTF-16LE'Wednesday')], =6=[=($14, _UTF-16LE'Thursday')], =7=[=($14, _UTF-16LE'Friday')], =8=[=($14, _UTF-16LE'Saturday')]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1197, 1208), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out index a25bb08942..921b6f21a7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query6.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[170][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[171][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: explain cbo select a.ca_state state, count(*) cnt from customer_address a @@ -88,15 +88,16 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, store_sales]], table:alias=[s]) - HiveJoin(condition=[AND(=($3, $2), >($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_current_price=[$5], i_category=[$12]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($12))]) - HiveTableScan(table=[[default, item]], table:alias=[i]) - HiveProject(i_category=[$0], *=[*(1.2:DECIMAL(2, 1), CAST(CAST(/($1, $2)):DECIMAL(11, 6)):DECIMAL(16, 6))]) - HiveFilter(condition=[IS NOT NULL(CAST(CAST(/($1, $2)):DECIMAL(11, 6)):DECIMAL(16, 6))]) - HiveAggregate(group=[{12}], agg#0=[sum($5)], agg#1=[count($5)]) - HiveFilter(condition=[IS NOT NULL($12)]) - HiveTableScan(table=[[default, item]], table:alias=[j]) + HiveProject(i_item_sk=[$0], i_current_price=[$1], i_category=[$2], i_category0=[$3], *=[$4]) + HiveJoin(condition=[AND(=($3, $2), >($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_category=[$12]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($12))]) + HiveTableScan(table=[[default, item]], table:alias=[i]) + HiveProject(i_category=[$0], *=[*(1.2:DECIMAL(2, 1), CAST(CAST(/($1, $2)):DECIMAL(11, 6)):DECIMAL(16, 6))]) + HiveFilter(condition=[IS NOT NULL(CAST(CAST(/($1, $2)):DECIMAL(11, 6)):DECIMAL(16, 6))]) + HiveAggregate(group=[{12}], agg#0=[sum($5)], agg#1=[count($5)]) + HiveFilter(condition=[IS NOT NULL($12)]) + HiveTableScan(table=[[default, item]], table:alias=[j]) HiveProject(d_date_sk=[$0], d_month_seq=[$1], $f0=[$2]) HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(d_date_sk=[$0], d_month_seq=[$3]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out index 51f2ad97c6..76c41b5033 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out @@ -187,13 +187,14 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_addr_sk=[$2], ss_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_addr_sk=[$6], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($7)]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -209,13 +210,14 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$1], cs_item_sk=[$2], cs_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_addr_sk=[$6], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($7)]) HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -231,11 +233,12 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_bill_addr_sk=[$2], ws_ext_sales_price=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_bill_addr_sk=[$7], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out index 486aef4f1c..77a87bfdc1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out @@ -283,30 +283,32 @@ HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number= HiveJoin(condition=[AND(=($9, $0), =($15, $1))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($7, $20)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_wholesale_cost=[$11], ss_list_price=[$12], ss_coupon_amt=[$19]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(2, 0), 45:DECIMAL(2, 0)), IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(cs_item_sk=[$0]) - HiveFilter(condition=[>($1, *(2:DECIMAL(10, 0), $2))]) - HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) - HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_item_sk=[$15], cs_order_number=[$17], cs_ext_list_price=[$25]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(cr_item_sk=[$2], cr_order_number=[$16], +=[+(+($23, $24), $25)]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$1], c_current_hdemo_sk=[$2], c_current_addr_sk=[$3], c_first_shipto_date_sk=[$4], c_first_sales_date_sk=[$5], ss_sold_date_sk=[$6], ss_item_sk=[$7], ss_customer_sk=[$8], ss_cdemo_sk=[$9], ss_hdemo_sk=[$10], ss_addr_sk=[$11], ss_store_sk=[$12], ss_ticket_number=[$13], ss_wholesale_cost=[$14], ss_list_price=[$15], ss_coupon_amt=[$16], i_item_sk=[$17], i_product_name=[$18], d_date_sk=[$19], cs_item_sk=[$20]) + HiveJoin(condition=[=($7, $20)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_cdemo_sk=[$3], ss_hdemo_sk=[$4], ss_addr_sk=[$5], ss_store_sk=[$6], ss_ticket_number=[$7], ss_wholesale_cost=[$8], ss_list_price=[$9], ss_coupon_amt=[$10], i_item_sk=[$11], i_product_name=[$12], d_date_sk=[$13]) + HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_wholesale_cost=[$11], ss_list_price=[$12], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_product_name=[$21]) + HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(2, 0), 45:DECIMAL(2, 0)), IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(cs_item_sk=[$0]) + HiveFilter(condition=[>($1, *(2:DECIMAL(10, 0), $2))]) + HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) + HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$15], cs_order_number=[$17], cs_ext_list_price=[$25]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], +=[+(+($23, $24), $25)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($25))]) HiveTableScan(table=[[default, store]], table:alias=[store]) @@ -344,30 +346,32 @@ HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number= HiveJoin(condition=[AND(=($9, $0), =($15, $1))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($7, $20)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_wholesale_cost=[$11], ss_list_price=[$12], ss_coupon_amt=[$19]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(2, 0), 45:DECIMAL(2, 0)), IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2000)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(cs_item_sk=[$0]) - HiveFilter(condition=[>($1, *(2:DECIMAL(10, 0), $2))]) - HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) - HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_item_sk=[$15], cs_order_number=[$17], cs_ext_list_price=[$25]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(cr_item_sk=[$2], cr_order_number=[$16], +=[+(+($23, $24), $25)]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$1], c_current_hdemo_sk=[$2], c_current_addr_sk=[$3], c_first_shipto_date_sk=[$4], c_first_sales_date_sk=[$5], ss_sold_date_sk=[$6], ss_item_sk=[$7], ss_customer_sk=[$8], ss_cdemo_sk=[$9], ss_hdemo_sk=[$10], ss_addr_sk=[$11], ss_store_sk=[$12], ss_ticket_number=[$13], ss_wholesale_cost=[$14], ss_list_price=[$15], ss_coupon_amt=[$16], i_item_sk=[$17], i_product_name=[$18], d_date_sk=[$19], cs_item_sk=[$20]) + HiveJoin(condition=[=($7, $20)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_customer_sk=[$2], ss_cdemo_sk=[$3], ss_hdemo_sk=[$4], ss_addr_sk=[$5], ss_store_sk=[$6], ss_ticket_number=[$7], ss_wholesale_cost=[$8], ss_list_price=[$9], ss_coupon_amt=[$10], i_item_sk=[$11], i_product_name=[$12], d_date_sk=[$13]) + HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_wholesale_cost=[$11], ss_list_price=[$12], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_product_name=[$21]) + HiveFilter(condition=[AND(BETWEEN(false, $5, 36:DECIMAL(2, 0), 45:DECIMAL(2, 0)), IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(cs_item_sk=[$0]) + HiveFilter(condition=[>($1, *(2:DECIMAL(10, 0), $2))]) + HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) + HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$15], cs_order_number=[$17], cs_ext_list_price=[$25]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], +=[+(+($23, $24), $25)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($25))]) HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query65.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query65.q.out index af746307b7..b7dbf26845 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query65.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query65.q.out @@ -71,22 +71,11 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(s_store_sk=[$0], s_store_name=[$5]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[AND(=($3, $0), <=($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveProject(ss_store_sk=[$1], ss_item_sk=[$0], $f2=[$2]) - HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], *=[*(0.1:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(21, 6))]) - HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]) - HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) - HiveProject(ss_item_sk=[$0], ss_store_sk=[$1], $f2=[$2]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f00=[$3], *=[$4]) + HiveJoin(condition=[AND(=($3, $0), <=($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveProject(ss_store_sk=[$1], ss_item_sk=[$0], $f2=[$2]) HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) @@ -95,6 +84,18 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], *=[*(0.1:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(21, 6))]) + HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]) + HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)]) + HiveProject(ss_item_sk=[$0], ss_store_sk=[$1], $f2=[$2]) + HiveAggregate(group=[{1, 2}], agg#0=[sum($3)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_desc=[$4], i_current_price=[$5], i_wholesale_cost=[$6], i_brand=[$8]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query66.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query66.q.out index 6e4ab9b627..81156dc7d4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query66.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query66.q.out @@ -467,21 +467,22 @@ HiveProject(w_warehouse_name=[$0], w_warehouse_sq_ft=[$1], w_city=[$2], w_county HiveJoin(condition=[=($10, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2], w_warehouse_sq_ft=[$3], w_city=[$8], w_county=[$9], w_state=[$10], w_country=[$12]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) - HiveJoin(condition=[=($2, $20)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_sold_time_sk=[$1], ws_ship_mode_sk=[$14], ws_warehouse_sk=[$15], *=[*($21, CAST($18):DECIMAL(10, 0))], *5=[*($30, CAST($18):DECIMAL(10, 0))]) - HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($15), IS NOT NULL($0), IS NOT NULL($14))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(t_time_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $2, 49530, 78330)]) - HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) - HiveProject(d_date_sk=[$0], ==[=($8, 1)], =2=[=($8, 2)], =3=[=($8, 3)], =4=[=($8, 4)], =5=[=($8, 5)], =6=[=($8, 6)], =7=[=($8, 7)], =8=[=($8, 8)], =9=[=($8, 9)], =10=[=($8, 10)], =11=[=($8, 11)], =12=[=($8, 12)]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(sm_ship_mode_sk=[$0]) - HiveFilter(condition=[IN($4, _UTF-16LE'DIAMOND', _UTF-16LE'AIRBORNE')]) - HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) + HiveProject(ws_sold_date_sk=[$0], ws_sold_time_sk=[$1], ws_ship_mode_sk=[$2], ws_warehouse_sk=[$3], *=[$4], *5=[$5], t_time_sk=[$6], d_date_sk=[$7], ==[$8], =2=[$9], =3=[$10], =4=[$11], =5=[$12], =6=[$13], =7=[$14], =8=[$15], =9=[$16], =10=[$17], =11=[$18], =12=[$19], sm_ship_mode_sk=[$20]) + HiveJoin(condition=[=($2, $20)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_sold_time_sk=[$1], ws_ship_mode_sk=[$14], ws_warehouse_sk=[$15], *=[*($21, CAST($18):DECIMAL(10, 0))], *5=[*($30, CAST($18):DECIMAL(10, 0))]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($15), IS NOT NULL($0), IS NOT NULL($14))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(t_time_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $2, 49530, 78330)]) + HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) + HiveProject(d_date_sk=[$0], ==[=($8, 1)], =2=[=($8, 2)], =3=[=($8, 3)], =4=[=($8, 4)], =5=[=($8, 5)], =6=[=($8, 6)], =7=[=($8, 7)], =8=[=($8, 8)], =9=[=($8, 9)], =10=[=($8, 10)], =11=[=($8, 11)], =12=[=($8, 12)]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(sm_ship_mode_sk=[$0]) + HiveFilter(condition=[IN($4, _UTF-16LE'DIAMOND', _UTF-16LE'AIRBORNE')]) + HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8], $f9=[$9], $f10=[$10], $f11=[$11], $f12=[$12], $f13=[$13], $f14=[$14], $f15=[$15], $f16=[$16], $f17=[$17], $f18=[$18], $f19=[$19], $f20=[$20], $f21=[$21], $f22=[$22], $f23=[$23], $f24=[$24], $f25=[$25], $f26=[$26], $f27=[$27], $f28=[$28], $f29=[$29]) HiveAggregate(group=[{0, 1, 2, 3, 4, 5}], agg#0=[sum($6)], agg#1=[sum($7)], agg#2=[sum($8)], agg#3=[sum($9)], agg#4=[sum($10)], agg#5=[sum($11)], agg#6=[sum($12)], agg#7=[sum($13)], agg#8=[sum($14)], agg#9=[sum($15)], agg#10=[sum($16)], agg#11=[sum($17)], agg#12=[sum($18)], agg#13=[sum($19)], agg#14=[sum($20)], agg#15=[sum($21)], agg#16=[sum($22)], agg#17=[sum($23)], agg#18=[sum($24)], agg#19=[sum($25)], agg#20=[sum($26)], agg#21=[sum($27)], agg#22=[sum($28)], agg#23=[sum($29)]) HiveProject($f0=[$22], $f1=[$23], $f2=[$24], $f3=[$25], $f4=[$26], $f5=[$27], $f7=[CASE($8, $4, 0:DECIMAL(18, 2))], $f8=[CASE($9, $4, 0:DECIMAL(18, 2))], $f9=[CASE($10, $4, 0:DECIMAL(18, 2))], $f10=[CASE($11, $4, 0:DECIMAL(18, 2))], $f11=[CASE($12, $4, 0:DECIMAL(18, 2))], $f12=[CASE($13, $4, 0:DECIMAL(18, 2))], $f13=[CASE($14, $4, 0:DECIMAL(18, 2))], $f14=[CASE($15, $4, 0:DECIMAL(18, 2))], $f15=[CASE($16, $4, 0:DECIMAL(18, 2))], $f16=[CASE($17, $4, 0:DECIMAL(18, 2))], $f17=[CASE($18, $4, 0:DECIMAL(18, 2))], $f18=[CASE($19, $4, 0:DECIMAL(18, 2))], $f19=[CASE($8, $5, 0:DECIMAL(18, 2))], $f20=[CASE($9, $5, 0:DECIMAL(18, 2))], $f21=[CASE($10, $5, 0:DECIMAL(18, 2))], $f22=[CASE($11, $5, 0:DECIMAL(18, 2))], $f23=[CASE($12, $5, 0:DECIMAL(18, 2))], $f24=[CASE($13, $5, 0:DECIMAL(18, 2))], $f25=[CASE($14, $5, 0:DECIMAL(18, 2))], $f26=[CASE($15, $5, 0:DECIMAL(18, 2))], $f27=[CASE($16, $5, 0:DECIMAL(18, 2))], $f28=[CASE($17, $5, 0:DECIMAL(18, 2))], $f29=[CASE($18, $5, 0:DECIMAL(18, 2))], $f30=[CASE($19, $5, 0:DECIMAL(18, 2))]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query68.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query68.q.out index c1605999a8..e81e5d1971 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query68.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query68.q.out @@ -109,19 +109,20 @@ HiveSortLimit(sort0=[$0], sort1=[$4], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], ca_city=[$6]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_ext_sales_price=[$15], ss_ext_list_price=[$17], ss_ext_tax=[$18]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, $9, 1, 2), IN($6, 1998, 1999, 2000))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[IN($22, _UTF-16LE'Cedar Grove', _UTF-16LE'Wildwood')]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[OR(=($4, 1), =($3, 2))]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], ss_hdemo_sk=[$2], ss_addr_sk=[$3], ss_store_sk=[$4], ss_ticket_number=[$5], ss_ext_sales_price=[$6], ss_ext_list_price=[$7], ss_ext_tax=[$8], d_date_sk=[$9], s_store_sk=[$10], hd_demo_sk=[$11]) + HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_ext_sales_price=[$15], ss_ext_list_price=[$17], ss_ext_tax=[$18]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($3), IS NOT NULL($5), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, $9, 1, 2), IN($6, 1998, 1999, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IN($22, _UTF-16LE'Cedar Grove', _UTF-16LE'Wildwood')]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[OR(=($4, 1), =($3, 2))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query7.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query7.q.out index 77580d817a..bc384b57d6 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query7.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query7.q.out @@ -57,19 +57,20 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($3, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_cdemo_sk=[$4], ss_promo_sk=[$8], ss_quantity=[$10], ss_list_price=[$12], ss_sales_price=[$13], ss_coupon_amt=[$19]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(cd_demo_sk=[$0]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'W'), =($3, _UTF-16LE'Primary'), =($1, _UTF-16LE'F'))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 1998)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[OR(=($9, _UTF-16LE'N'), =($14, _UTF-16LE'N'))]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_cdemo_sk=[$2], ss_promo_sk=[$3], ss_quantity=[$4], ss_list_price=[$5], ss_sales_price=[$6], ss_coupon_amt=[$7], cd_demo_sk=[$8], d_date_sk=[$9], p_promo_sk=[$10]) + HiveJoin(condition=[=($3, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_cdemo_sk=[$4], ss_promo_sk=[$8], ss_quantity=[$10], ss_list_price=[$12], ss_sales_price=[$13], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0), IS NOT NULL($8))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'W'), =($3, _UTF-16LE'Primary'), =($1, _UTF-16LE'F'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[OR(=($9, _UTF-16LE'N'), =($14, _UTF-16LE'N'))]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query71.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query71.q.out index bcd25b3254..ee95b8235b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query71.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query71.q.out @@ -97,34 +97,35 @@ HiveProject(brand_id=[$0], brand=[$1], t_hour=[$2], t_minute=[$3], ext_price=[$4 HiveProject(t_time_sk=[$0], t_hour=[$3], t_minute=[$4]) HiveFilter(condition=[IN($9, _UTF-16LE'breakfast', _UTF-16LE'dinner')]) HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ext_price=[$0], sold_item_sk=[$1], time_sk=[$2]) - HiveUnion(all=[true]) - HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) - HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_sold_time_sk=[$1], ws_item_sk=[$3], ws_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), =($8, 12))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) - HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_sold_time_sk=[$1], cs_item_sk=[$15], cs_ext_sales_price=[$23]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), =($8, 12))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) - HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_sold_time_sk=[$1], ss_item_sk=[$2], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), =($8, 12))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_brand=[$8]) - HiveFilter(condition=[=($20, 1)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ext_price=[$0], sold_item_sk=[$1], time_sk=[$2], i_item_sk=[$3], i_brand_id=[$4], i_brand=[$5]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ext_price=[$0], sold_item_sk=[$1], time_sk=[$2]) + HiveUnion(all=[true]) + HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_sold_time_sk=[$1], ws_item_sk=[$3], ws_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), =($8, 12))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_sold_time_sk=[$1], cs_item_sk=[$15], cs_ext_sales_price=[$23]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), =($8, 12))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ext_price=[$3], sold_item_sk=[$2], time_sk=[$1]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_sold_time_sk=[$1], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), =($8, 12))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_brand=[$8]) + HiveFilter(condition=[=($20, 1)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query72.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query72.q.out index 346a3a9a46..9f20251a11 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query72.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query72.q.out @@ -93,26 +93,27 @@ HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], sort3=[$2], dir0=[DESC-nulls-l HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) HiveFilter(condition=[IS NOT NULL($3)]) HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) - HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_ship_date_sk=[$2], cs_bill_cdemo_sk=[$4], cs_bill_hdemo_sk=[$5], cs_item_sk=[$15], cs_promo_sk=[$16], cs_order_number=[$17], cs_quantity=[$18]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($18), IS NOT NULL($5))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(cd_demo_sk=[$0]) - HiveFilter(condition=[=($2, _UTF-16LE'M')]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) - HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[=($2, _UTF-16LE'1001-5000')]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveProject(d_date_sk=[$0], d_week_seq=[$1], d_date_sk0=[$2], d_week_seq0=[$3], +=[$4]) - HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0], d_week_seq=[$4]) - HiveFilter(condition=[IS NOT NULL($4)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(d_date_sk=[$0], d_week_seq=[$4], +=[+(CAST($2):DOUBLE, 5)]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($4), IS NOT NULL(CAST($2):DOUBLE))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_date_sk=[$1], cs_bill_cdemo_sk=[$2], cs_bill_hdemo_sk=[$3], cs_item_sk=[$4], cs_promo_sk=[$5], cs_order_number=[$6], cs_quantity=[$7], cd_demo_sk=[$8], hd_demo_sk=[$9], d_date_sk=[$10], d_week_seq=[$11], d_date_sk0=[$12], d_week_seq0=[$13], +=[$14]) + HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_ship_date_sk=[$2], cs_bill_cdemo_sk=[$4], cs_bill_hdemo_sk=[$5], cs_item_sk=[$15], cs_promo_sk=[$16], cs_order_number=[$17], cs_quantity=[$18]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($18), IS NOT NULL($5))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[=($2, _UTF-16LE'M')]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[=($2, _UTF-16LE'1001-5000')]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(d_date_sk=[$0], d_week_seq=[$1], d_date_sk0=[$2], d_week_seq0=[$3], +=[$4]) + HiveJoin(condition=[=($3, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4], +=[+(CAST($2):DOUBLE, 5)]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($4), IS NOT NULL(CAST($2):DOUBLE))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) HiveProject(p_promo_sk=[$0]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out index 6d983afc31..18ad7bb399 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out @@ -137,51 +137,57 @@ HiveSortLimit(sort0=[$1], sort1=[$0], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2002), IN($6, 2001, 2002))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f4=[$3]) - HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], ss_net_paid=[$2], d_date_sk=[$3]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2002), IN($6, 2001, 2002))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(customer_id=[$0], year_total=[$3], CAST=[CAST(IS NOT NULL($3)):BOOLEAN]) - HiveFilter(condition=[>($3, 0:DECIMAL(1, 0))]) - HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject($f0=[$0], $f4=[$1], customer_id=[$2], year_total=[$3], CAST=[$4], $f00=[$5], $f40=[$6]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f4=[$3]) + HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], ws_net_paid=[$2], d_date_sk=[$3]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IN($6, 2001, 2002))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0], $f4=[$3]) - HiveFilter(condition=[>($3, 0:DECIMAL(1, 0))]) - HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2001), IN($6, 2001, 2002))]) + HiveFilter(condition=[AND(=($6, 2002), IN($6, 2001, 2002))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(customer_id=[$0], year_total=[$1], CAST=[$2], $f0=[$3], $f4=[$4]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(customer_id=[$0], year_total=[$3], CAST=[CAST(IS NOT NULL($3)):BOOLEAN]) + HiveFilter(condition=[>($3, 0:DECIMAL(1, 0))]) + HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], ws_net_paid=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IN($6, 2001, 2002))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f4=[$3]) + HiveFilter(condition=[>($3, 0:DECIMAL(1, 0))]) + HiveAggregate(group=[{1, 2, 3}], agg#0=[max($6)]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], ss_net_paid=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2001), IN($6, 2001, 2002))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query75.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query75.q.out index 82f65782a6..d47fb717fe 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query75.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query75.q.out @@ -173,47 +173,50 @@ HiveProject(prev_year=[CAST(2001):INTEGER], year=[CAST(2002):INTEGER], i_brand_i HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_quantity=[$17], cr_return_amount=[$18]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_ext_sales_price=[$23]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$1], cs_order_number=[$2], cs_quantity=[$3], cs_ext_sales_price=[$4], d_date_sk=[$5], i_item_sk=[$6], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_brand_id=[$11], i_class_id=[$12], i_category_id=[$13], i_manufact_id=[$14], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0:DECIMAL(1, 0)))]) HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_quantity=[$10], sr_return_amt=[$11]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_ext_sales_price=[$15]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_ticket_number=[$2], ss_quantity=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5], i_item_sk=[$6], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_ext_sales_price=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_brand_id=[$11], i_class_id=[$12], i_category_id=[$13], i_manufact_id=[$14], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0:DECIMAL(1, 0)))]) HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_ext_sales_price=[$23]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_order_number=[$2], ws_quantity=[$3], ws_ext_sales_price=[$4], d_date_sk=[$5], i_item_sk=[$6], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], $f4=[$4], $f5=[$5]) HiveAggregate(group=[{0, 1, 2, 3}], agg#0=[sum($4)], agg#1=[sum($5)]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], i_manufact_id=[$3], sales_cnt=[$4], sales_amt=[$5]) @@ -228,45 +231,48 @@ HiveProject(prev_year=[CAST(2001):INTEGER], year=[CAST(2002):INTEGER], i_brand_i HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_quantity=[$17], cr_return_amount=[$18]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_ext_sales_price=[$23]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$1], cs_order_number=[$2], cs_quantity=[$3], cs_ext_sales_price=[$4], d_date_sk=[$5], i_item_sk=[$6], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_order_number=[$17], cs_quantity=[$18], cs_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_brand_id=[$11], i_class_id=[$12], i_category_id=[$13], i_manufact_id=[$14], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0:DECIMAL(1, 0)))]) HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_quantity=[$10], sr_return_amt=[$11]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_ext_sales_price=[$15]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_ticket_number=[$2], ss_quantity=[$3], ss_ext_sales_price=[$4], d_date_sk=[$5], i_item_sk=[$6], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ticket_number=[$9], ss_quantity=[$10], ss_ext_sales_price=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_brand_id=[$11], i_class_id=[$12], i_category_id=[$13], i_manufact_id=[$14], sales_cnt=[-($7, CASE(IS NOT NULL($2), $2, 0))], sales_amt=[-($8, CASE(IS NOT NULL($3), $3, 0:DECIMAL(1, 0)))]) HiveJoin(condition=[AND(=($6, $1), =($5, $0))], joinType=[right], algorithm=[none], cost=[not available]) HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_ext_sales_price=[$23]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_order_number=[$2], ws_quantity=[$3], ws_ext_sales_price=[$4], d_date_sk=[$5], i_item_sk=[$6], i_brand_id=[$7], i_class_id=[$8], i_category_id=[$9], i_manufact_id=[$10]) + HiveJoin(condition=[=($6, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], ws_ext_sales_price=[$23]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11], i_manufact_id=[$13]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Sports'), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query80.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query80.q.out index fa6a0e8163..3f43b878cd 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query80.q.out @@ -226,48 +226,52 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(s_store_sk=[$0], s_store_id=[$1]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($4, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($1, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_ext_sales_price=[$15], ss_net_profit=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_amt=[$11], sr_net_loss=[$19]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(i_item_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_store_sk=[$3], ss_promo_sk=[$4], ss_ticket_number=[$5], ss_ext_sales_price=[$6], ss_net_profit=[$7], sr_item_sk=[$8], sr_ticket_number=[$9], sr_return_amt=[$10], sr_net_loss=[$11], d_date_sk=[$12], p_promo_sk=[$13]) + HiveJoin(condition=[=($4, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_store_sk=[$2], ss_promo_sk=[$3], ss_ticket_number=[$4], ss_ext_sales_price=[$5], ss_net_profit=[$6], sr_item_sk=[$7], sr_ticket_number=[$8], sr_return_amt=[$9], sr_net_loss=[$10], d_date_sk=[$11]) + HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_ext_sales_price=[$15], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($8), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9], sr_return_amt=[$11], sr_net_loss=[$19]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(channel=[_UTF-16LE'catalog channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'catalog_page', $0)], sales=[$1], returns=[$2], profit=[$3]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) HiveProject($f0=[$1], $f1=[$8], $f2=[CASE(IS NOT NULL($12), $12, 0:DECIMAL(12, 2))], $f3=[-($9, CASE(IS NOT NULL($13), $13, 0:DECIMAL(12, 2)))]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) - HiveJoin(condition=[=($4, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($2, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_catalog_page_sk=[$12], cs_item_sk=[$15], cs_promo_sk=[$16], cs_order_number=[$17], cs_ext_sales_price=[$23], cs_net_profit=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($16), IS NOT NULL($0), IS NOT NULL($12))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_amount=[$18], cr_net_loss=[$26]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(i_item_sk=[$0], cs_sold_date_sk=[$1], cs_catalog_page_sk=[$2], cs_item_sk=[$3], cs_promo_sk=[$4], cs_order_number=[$5], cs_ext_sales_price=[$6], cs_net_profit=[$7], cr_item_sk=[$8], cr_order_number=[$9], cr_return_amount=[$10], cr_net_loss=[$11], d_date_sk=[$12], p_promo_sk=[$13]) + HiveJoin(condition=[=($4, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(cs_sold_date_sk=[$0], cs_catalog_page_sk=[$1], cs_item_sk=[$2], cs_promo_sk=[$3], cs_order_number=[$4], cs_ext_sales_price=[$5], cs_net_profit=[$6], cr_item_sk=[$7], cr_order_number=[$8], cr_return_amount=[$9], cr_net_loss=[$10], d_date_sk=[$11]) + HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($2, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_catalog_page_sk=[$12], cs_item_sk=[$15], cs_promo_sk=[$16], cs_order_number=[$17], cs_ext_sales_price=[$23], cs_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($16), IS NOT NULL($0), IS NOT NULL($12))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], cr_return_amount=[$18], cr_net_loss=[$26]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(channel=[_UTF-16LE'web channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'web_site', $0)], sales=[$1], returns=[$2], profit=[$3]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) HiveProject($f0=[$15], $f1=[$7], $f2=[CASE(IS NOT NULL($11), $11, 0:DECIMAL(12, 2))], $f3=[-($8, CASE(IS NOT NULL($12), $12, 0:DECIMAL(12, 2)))]) @@ -276,20 +280,22 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(p_promo_sk=[$0]) HiveFilter(condition=[=($11, _UTF-16LE'N')]) HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($1, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_web_site_sk=[$13], ws_promo_sk=[$16], ws_order_number=[$17], ws_ext_sales_price=[$23], ws_net_profit=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($16), IS NOT NULL($13), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_amt=[$15], wr_net_loss=[$23]) - HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], ws_sold_date_sk=[$1], ws_item_sk=[$2], ws_web_site_sk=[$3], ws_promo_sk=[$4], ws_order_number=[$5], ws_ext_sales_price=[$6], ws_net_profit=[$7], wr_item_sk=[$8], wr_order_number=[$9], wr_return_amt=[$10], wr_net_loss=[$11], d_date_sk=[$12]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_web_site_sk=[$2], ws_promo_sk=[$3], ws_order_number=[$4], ws_ext_sales_price=[$5], ws_net_profit=[$6], wr_item_sk=[$7], wr_order_number=[$8], wr_return_amt=[$9], wr_net_loss=[$10], d_date_sk=[$11]) + HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $7), =($4, $8))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_web_site_sk=[$13], ws_promo_sk=[$16], ws_order_number=[$17], ws_ext_sales_price=[$23], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($16), IS NOT NULL($13), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(wr_item_sk=[$2], wr_order_number=[$13], wr_return_amt=[$15], wr_net_loss=[$23]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(web_site_sk=[$0], web_site_id=[$1]) HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query81.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query81.q.out index 1b985b5748..305c38bbe6 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query81.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query81.q.out @@ -90,13 +90,14 @@ HiveProject(c_customer_id=[$0], c_salutation=[$1], c_first_name=[$2], c_last_nam HiveProject(ca_address_sk=[$0], ca_state=[$8]) HiveFilter(condition=[IS NOT NULL($8)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_returning_addr_sk=[$10], cr_return_amt_inc_tax=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($7), IS NOT NULL($0))]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 1998)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$1], cr_returning_addr_sk=[$2], cr_return_amt_inc_tax=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_returning_addr_sk=[$10], cr_return_amt_inc_tax=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($7), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(_o__c0=[*(CAST(/($1, $2)):DECIMAL(21, 6), 1.2:DECIMAL(2, 1))], ctr_state=[$0]) HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(21, 6))]) HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[count($2)]) @@ -106,11 +107,12 @@ HiveProject(c_customer_id=[$0], c_salutation=[$1], c_first_name=[$2], c_last_nam HiveProject(ca_address_sk=[$0], ca_state=[$8]) HiveFilter(condition=[IS NOT NULL($8)]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_returning_addr_sk=[$10], cr_return_amt_inc_tax=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($0))]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 1998)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$1], cr_returning_addr_sk=[$2], cr_return_amt_inc_tax=[$3], d_date_sk=[$4]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_returning_addr_sk=[$10], cr_return_amt_inc_tax=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out index fe05a6e300..22f4031ee9 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out @@ -154,19 +154,20 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(cr_returned_date_sk=[$0], cr_item_sk=[$2], cr_return_quantity=[$17]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date=[$0]) - HiveAggregate(group=[{0}]) - HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) @@ -176,6 +177,30 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_return_quantity=[$10]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(item_id=[$0], wr_item_qty=[$1], CAST=[CAST($1):DOUBLE]) + HiveAggregate(group=[{7}], agg#0=[sum($2)]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_returned_date_sk=[$0], wr_item_sk=[$2], wr_return_quantity=[$14]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(d_date_sk=[$0], d_date=[$1], d_date0=[$2]) HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[IS NOT NULL($2)]) @@ -189,28 +214,6 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(d_week_seq=[$4]) HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(item_id=[$0], wr_item_qty=[$1], CAST=[CAST($1):DOUBLE]) - HiveAggregate(group=[{7}], agg#0=[sum($2)]) - HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(wr_returned_date_sk=[$0], wr_item_sk=[$2], wr_return_quantity=[$14]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date=[$0]) - HiveAggregate(group=[{0}]) - HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'1998-01-02', _UTF-16LE'1998-10-15', _UTF-16LE'1998-11-10'), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out index 34320030ff..d6226006a4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out @@ -188,28 +188,30 @@ HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[$2], _o__c3=[$3]) HiveJoin(condition=[=($0, $26)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(r_reason_sk=[$0], r_reason_desc=[$2]) HiveTableScan(table=[[default, reason]], table:alias=[reason]) - HiveJoin(condition=[AND(=($1, $20), =($2, $25), OR(AND($14, $15, $7), AND($16, $17, $8), AND($18, $19, $9)), OR(AND($29, $4), AND($30, $5), AND($31, $6)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], BETWEEN=[BETWEEN(false, $33, 100:DECIMAL(12, 2), 200:DECIMAL(12, 2))], BETWEEN6=[BETWEEN(false, $33, 150:DECIMAL(12, 2), 300:DECIMAL(12, 2))], BETWEEN7=[BETWEEN(false, $33, 50:DECIMAL(12, 2), 250:DECIMAL(12, 2))], BETWEEN8=[BETWEEN(false, $21, 100:DECIMAL(3, 0), 150:DECIMAL(3, 0))], BETWEEN9=[BETWEEN(false, $21, 50:DECIMAL(2, 0), 100:DECIMAL(3, 0))], BETWEEN10=[BETWEEN(false, $21, 150:DECIMAL(3, 0), 200:DECIMAL(3, 0))]) - HiveFilter(condition=[AND(OR(<=(100:DECIMAL(3, 0), $21), <=($21, 150:DECIMAL(3, 0)), <=(50:DECIMAL(2, 0), $21), <=($21, 100:DECIMAL(3, 0)), <=(150:DECIMAL(3, 0), $21), <=($21, 200:DECIMAL(3, 0))), OR(<=(100:DECIMAL(12, 2), $33), <=($33, 200:DECIMAL(12, 2)), <=(150:DECIMAL(12, 2), $33), <=($33, 300:DECIMAL(12, 2)), <=(50:DECIMAL(12, 2), $33), <=($33, 250:DECIMAL(12, 2))), IS NOT NULL($0), IS NOT NULL($12))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 1998)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$1], cd_education_status=[$2], ==[$3], =4=[$4], =5=[$5], =6=[$6], =7=[$7], =8=[$8], wr_item_sk=[$9], wr_refunded_cdemo_sk=[$10], wr_refunded_addr_sk=[$11], wr_returning_cdemo_sk=[$12], wr_reason_sk=[$13], wr_order_number=[$14], wr_fee=[$15], wr_refunded_cash=[$16], ca_address_sk=[$17], IN=[$18], IN2=[$19], IN3=[$20], cd_demo_sk0=[$21], cd_marital_status0=[$22], cd_education_status0=[$23]) - HiveJoin(condition=[AND(=($1, $22), =($2, $23), =($0, $10))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3], ==[=($2, _UTF-16LE'M')], =4=[=($3, _UTF-16LE'4 yr Degree')], =5=[=($2, _UTF-16LE'D')], =6=[=($3, _UTF-16LE'Primary')], =7=[=($2, _UTF-16LE'U')], =8=[=($3, _UTF-16LE'Advanced Degree')]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) - HiveJoin(condition=[=($12, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($8, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(wr_item_sk=[$2], wr_refunded_cdemo_sk=[$4], wr_refunded_addr_sk=[$6], wr_returning_cdemo_sk=[$8], wr_reason_sk=[$12], wr_order_number=[$13], wr_fee=[$18], wr_refunded_cash=[$20]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($8), IS NOT NULL($4), IS NOT NULL($12))]) - HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$1], ws_order_number=[$2], ws_quantity=[$3], BETWEEN=[$4], BETWEEN6=[$5], BETWEEN7=[$6], BETWEEN8=[$7], BETWEEN9=[$8], BETWEEN10=[$9], d_date_sk=[$10], cd_demo_sk=[$11], cd_marital_status=[$12], cd_education_status=[$13], ==[$14], =4=[$15], =5=[$16], =6=[$17], =7=[$18], =8=[$19], wr_item_sk=[$20], wr_refunded_cdemo_sk=[$21], wr_refunded_addr_sk=[$22], wr_returning_cdemo_sk=[$23], wr_reason_sk=[$24], wr_order_number=[$25], wr_fee=[$26], wr_refunded_cash=[$27], ca_address_sk=[$28], IN=[$29], IN2=[$30], IN3=[$31], cd_demo_sk0=[$32], cd_marital_status0=[$33], cd_education_status0=[$34]) + HiveJoin(condition=[AND(=($1, $20), =($2, $25), OR(AND($14, $15, $7), AND($16, $17, $8), AND($18, $19, $9)), OR(AND($29, $4), AND($30, $5), AND($31, $6)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], BETWEEN=[BETWEEN(false, $33, 100:DECIMAL(12, 2), 200:DECIMAL(12, 2))], BETWEEN6=[BETWEEN(false, $33, 150:DECIMAL(12, 2), 300:DECIMAL(12, 2))], BETWEEN7=[BETWEEN(false, $33, 50:DECIMAL(12, 2), 250:DECIMAL(12, 2))], BETWEEN8=[BETWEEN(false, $21, 100:DECIMAL(3, 0), 150:DECIMAL(3, 0))], BETWEEN9=[BETWEEN(false, $21, 50:DECIMAL(2, 0), 100:DECIMAL(3, 0))], BETWEEN10=[BETWEEN(false, $21, 150:DECIMAL(3, 0), 200:DECIMAL(3, 0))]) + HiveFilter(condition=[AND(OR(<=(100:DECIMAL(3, 0), $21), <=($21, 150:DECIMAL(3, 0)), <=(50:DECIMAL(2, 0), $21), <=($21, 100:DECIMAL(3, 0)), <=(150:DECIMAL(3, 0), $21), <=($21, 200:DECIMAL(3, 0))), OR(<=(100:DECIMAL(12, 2), $33), <=($33, 200:DECIMAL(12, 2)), <=(150:DECIMAL(12, 2), $33), <=($33, 300:DECIMAL(12, 2)), <=(50:DECIMAL(12, 2), $33), <=($33, 250:DECIMAL(12, 2))), IS NOT NULL($0), IS NOT NULL($12))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$1], cd_education_status=[$2], ==[$3], =4=[$4], =5=[$5], =6=[$6], =7=[$7], =8=[$8], wr_item_sk=[$9], wr_refunded_cdemo_sk=[$10], wr_refunded_addr_sk=[$11], wr_returning_cdemo_sk=[$12], wr_reason_sk=[$13], wr_order_number=[$14], wr_fee=[$15], wr_refunded_cash=[$16], ca_address_sk=[$17], IN=[$18], IN2=[$19], IN3=[$20], cd_demo_sk0=[$21], cd_marital_status0=[$22], cd_education_status0=[$23]) + HiveJoin(condition=[AND(=($1, $22), =($2, $23), =($0, $10))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3], ==[=($2, _UTF-16LE'M')], =4=[=($3, _UTF-16LE'4 yr Degree')], =5=[=($2, _UTF-16LE'D')], =6=[=($3, _UTF-16LE'Primary')], =7=[=($2, _UTF-16LE'U')], =8=[=($3, _UTF-16LE'Advanced Degree')]) HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveProject(wr_item_sk=[$0], wr_refunded_cdemo_sk=[$1], wr_refunded_addr_sk=[$2], wr_returning_cdemo_sk=[$3], wr_reason_sk=[$4], wr_order_number=[$5], wr_fee=[$6], wr_refunded_cash=[$7], ca_address_sk=[$8], IN=[$9], IN2=[$10], IN3=[$11], cd_demo_sk=[$12], cd_marital_status=[$13], cd_education_status=[$14]) + HiveJoin(condition=[=($12, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($8, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_item_sk=[$2], wr_refunded_cdemo_sk=[$4], wr_refunded_addr_sk=[$6], wr_returning_cdemo_sk=[$8], wr_reason_sk=[$12], wr_order_number=[$13], wr_fee=[$18], wr_refunded_cash=[$20]) + HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($8), IS NOT NULL($4), IS NOT NULL($12))]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query87.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query87.q.out index e100934a10..313965cd6e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query87.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query87.q.out @@ -73,13 +73,14 @@ HiveAggregate(group=[{}], agg#0=[count()]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], d_date_sk=[$2], d_date=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$1], $f1=[$0], $f2=[$2], $f3=[1:BIGINT], $f4=[$3]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) @@ -87,13 +88,14 @@ HiveAggregate(group=[{}], agg#0=[count()]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], d_date_sk=[$2], d_date=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[$1], $f1=[$0], $f2=[$2], $f3=[1:BIGINT], $f4=[$3]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) HiveProject(c_first_name=[$0], c_last_name=[$1], d_date=[$2]) @@ -101,11 +103,12 @@ HiveAggregate(group=[{}], agg#0=[count()]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_first_name=[$8], c_last_name=[$9]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], d_date_sk=[$2], d_date=[$3]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out index e19d1ca301..b7b295ddc5 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out @@ -86,13 +86,14 @@ HiveProject(call_center=[$0], call_center_name=[$1], manager=[$2], returns_loss= HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[=($11, -7:DECIMAL(1, 0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'W'), IN($3, _UTF-16LE'Unknown', _UTF-16LE'Advanced Degree'), IN(ROW($2, $3), ROW(_UTF-16LE'M', _UTF-16LE'Unknown'), ROW(_UTF-16LE'W', _UTF-16LE'Advanced Degree')))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$1], c_current_hdemo_sk=[$2], c_current_addr_sk=[$3], cd_demo_sk=[$4], cd_marital_status=[$5], cd_education_status=[$6]) + HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($2), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'W'), IN($3, _UTF-16LE'Unknown', _UTF-16LE'Advanced Degree'), IN(ROW($2, $3), ROW(_UTF-16LE'M', _UTF-16LE'Unknown'), ROW(_UTF-16LE'W', _UTF-16LE'Advanced Degree')))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$1], cr_call_center_sk=[$2], cr_net_loss=[$3], d_date_sk=[$4]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_call_center_sk=[$11], cr_net_loss=[$26]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query93.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query93.q.out index 49023208dc..f8a6357a84 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query93.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query93.q.out @@ -48,11 +48,12 @@ HiveSortLimit(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveJoin(condition=[AND(=($5, $0), =($7, $2))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_ticket_number=[$9], ss_quantity=[$10], ss_sales_price=[$13]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_reason_sk=[$8], sr_ticket_number=[$9], sr_return_quantity=[$10]) - HiveFilter(condition=[IS NOT NULL($8)]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(r_reason_sk=[$0]) - HiveFilter(condition=[=($2, _UTF-16LE'Did not like the warranty')]) - HiveTableScan(table=[[default, reason]], table:alias=[reason]) + HiveProject(sr_item_sk=[$0], sr_reason_sk=[$1], sr_ticket_number=[$2], sr_return_quantity=[$3], r_reason_sk=[$4]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_reason_sk=[$8], sr_ticket_number=[$9], sr_return_quantity=[$10]) + HiveFilter(condition=[IS NOT NULL($8)]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(r_reason_sk=[$0]) + HiveFilter(condition=[=($2, _UTF-16LE'Did not like the warranty')]) + HiveTableScan(table=[[default, reason]], table:alias=[reason]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out index ef108a6578..ad13182c8a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out @@ -77,13 +77,14 @@ HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[su HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"]) HiveFilter(condition=[=($8, _UTF-16LE'TX')]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_warehouse_sk=[$15], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($2))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ws_ship_date_sk=[$0], ws_ship_addr_sk=[$1], ws_web_site_sk=[$2], ws_warehouse_sk=[$3], ws_order_number=[$4], ws_ext_ship_cost=[$5], ws_net_profit=[$6], d_date_sk=[$7], d_date=[$8]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_warehouse_sk=[$15], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($2))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"]) HiveFilter(condition=[=($14, _UTF-16LE'pri')]) HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out index ace074316b..7e6708d7ed 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out @@ -85,27 +85,30 @@ HiveAggregate(group=[{}], agg#0=[count(DISTINCT $6)], agg#1=[sum($7)], agg#2=[su HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_order_number=[$0]) - HiveAggregate(group=[{1}]) - HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) - HiveJoin(condition=[=($3, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0]) - HiveFilter(condition=[=($8, _UTF-16LE'TX')]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($2))]) + HiveProject(ws_order_number=[$0], ca_address_sk=[$1], ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_order_number0=[$5], ws_ext_ship_cost=[$6], ws_net_profit=[$7], d_date_sk=[$8], d_date=[$9], web_site_sk=[$10]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_order_number=[$0]) + HiveAggregate(group=[{1}]) + HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(web_site_sk=[$0]) - HiveFilter(condition=[=($14, _UTF-16LE'pri')]) - HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveProject(ca_address_sk=[$0], ws_ship_date_sk=[$1], ws_ship_addr_sk=[$2], ws_web_site_sk=[$3], ws_order_number=[$4], ws_ext_ship_cost=[$5], ws_net_profit=[$6], d_date_sk=[$7], d_date=[$8], web_site_sk=[$9]) + HiveJoin(condition=[=($3, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($8, _UTF-16LE'TX')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(ws_ship_date_sk=[$0], ws_ship_addr_sk=[$1], ws_web_site_sk=[$2], ws_order_number=[$3], ws_ext_ship_cost=[$4], ws_net_profit=[$5], d_date_sk=[$6], d_date=[$7]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($2))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0]) + HiveFilter(condition=[=($14, _UTF-16LE'pri')]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out index 3df0e3dbe7..b32bebd1d3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out @@ -77,11 +77,12 @@ HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3 HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) HiveFilter(condition=[IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books')]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$1], ss_ext_sales_price=[$2], d_date_sk=[$3]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out index 1fcd766416..c5ee5df9cc 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/mv_query44.q.out @@ -19,7 +19,7 @@ POSTHOOK: type: CREATE_MATERIALIZED_VIEW POSTHOOK: Input: default@store_sales POSTHOOK: Output: database:default POSTHOOK: Output: default@mv_store_sales_item_customer -Warning: Shuffle Join MERGEJOIN[112][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[113][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 7' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -97,130 +97,130 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 11 <- Map 10 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 10 <- Reducer 7 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 6 <- Map 12 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 12 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 3 (SIMPLE_EDGE) +Reducer 9 <- Reducer 10 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_149] - Limit [LIM_148] (rows=100 width=218) + Reducer 4 vectorized + File Output Operator [FS_150] + Limit [LIM_149] (rows=100 width=218) Number of rows:100 - Select Operator [SEL_147] (rows=6951 width=218) + Select Operator [SEL_148] (rows=6951 width=218) Output:["_col0","_col1","_col2"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_70] - Select Operator [SEL_69] (rows=6951 width=218) + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_71] + Select Operator [SEL_70] (rows=6951 width=218) Output:["_col0","_col1","_col2"] - Top N Key Operator [TNK_99] (rows=6951 width=218) - keys:_col1,top n:100 - Merge Join Operator [MERGEJOIN_116] (rows=6951 width=218) - Conds:RS_66._col2=RS_146._col0(Inner),Output:["_col1","_col5","_col7"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] + Top N Key Operator [TNK_100] (rows=6951 width=218) + keys:_col3,top n:100 + Merge Join Operator [MERGEJOIN_117] (rows=6951 width=218) + Conds:RS_67._col4=RS_120._col0(Inner),Output:["_col1","_col3","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_120] PartitionCols:_col0 - Select Operator [SEL_144] (rows=462000 width=111) + Select Operator [SEL_118] (rows=462000 width=111) Output:["_col0","_col1"] - TableScan [TS_56] (rows=462000 width=111) + TableScan [TS_0] (rows=462000 width=111) default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_66] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_115] (rows=6951 width=115) - Conds:RS_63._col0=RS_145._col0(Inner),Output:["_col1","_col2","_col5"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_145] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_67] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_116] (rows=6951 width=115) + Conds:RS_119._col0=RS_65._col0(Inner),Output:["_col1","_col3","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_119] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_144] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_63] + Please refer to the previous Select Operator [SEL_118] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_65] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_114] (rows=6951 width=12) - Conds:RS_138._col1=RS_143._col1(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] + Merge Join Operator [MERGEJOIN_115] (rows=6951 width=12) + Conds:RS_142._col1=RS_147._col1(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_147] PartitionCols:_col1 - Select Operator [SEL_137] (rows=6951 width=8) + Select Operator [SEL_146] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_136] (rows=6951 width=116) + Filter Operator [FIL_145] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_135] (rows=20854 width=116) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_134] (rows=20854 width=116) + PTF Operator [PTF_144] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_143] (rows=20854 width=116) Output:["_col0","_col1"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_51] PartitionCols:0 - Top N Key Operator [TNK_100] (rows=20854 width=228) + Top N Key Operator [TNK_102] (rows=20854 width=228) keys:_col1,top n:11 - Filter Operator [FIL_20] (rows=20854 width=228) + Filter Operator [FIL_22] (rows=20854 width=228) predicate:(_col1 > (0.9 * _col2)) - Merge Join Operator [MERGEJOIN_112] (rows=62562 width=228) + Merge Join Operator [MERGEJOIN_113] (rows=62562 width=228) Conds:(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_133] - Select Operator [SEL_132] (rows=1 width=112) + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_137] + Select Operator [SEL_136] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_131] (rows=1 width=120) + Filter Operator [FIL_135] (rows=1 width=120) predicate:CAST( (_col1 / _col2) AS decimal(11,6)) is not null - Select Operator [SEL_130] (rows=1 width=120) + Select Operator [SEL_134] (rows=1 width=120) Output:["_col1","_col2"] - Group By Operator [GBY_129] (rows=1 width=124) + Group By Operator [GBY_133] (rows=1 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_132] PartitionCols:_col0 - Group By Operator [GBY_127] (rows=258 width=124) + Group By Operator [GBY_131] (rows=258 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true - Select Operator [SEL_126] (rows=287946 width=114) + Select Operator [SEL_130] (rows=287946 width=114) Output:["_col1"] - Filter Operator [FIL_125] (rows=287946 width=114) + Filter Operator [FIL_129] (rows=287946 width=114) predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) - TableScan [TS_8] (rows=575995635 width=114) + TableScan [TS_10] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_124] - Select Operator [SEL_123] (rows=62562 width=116) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_128] + Select Operator [SEL_127] (rows=62562 width=116) Output:["_col0","_col1"] - Filter Operator [FIL_122] (rows=62562 width=124) + Filter Operator [FIL_126] (rows=62562 width=124) predicate:CAST( (_col1 / _col2) AS decimal(11,6)) is not null - Group By Operator [GBY_121] (rows=62562 width=124) + Group By Operator [GBY_125] (rows=62562 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_124] PartitionCols:_col0 - Group By Operator [GBY_119] (rows=3199976 width=124) + Group By Operator [GBY_123] (rows=3199976 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_118] (rows=6399952 width=114) + Select Operator [SEL_122] (rows=6399952 width=114) Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_117] (rows=6399952 width=114) + Filter Operator [FIL_121] (rows=6399952 width=114) predicate:(ss_store_sk = 410) - TableScan [TS_0] (rows=575995635 width=114) + TableScan [TS_2] (rows=575995635 width=114) default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_142] PartitionCols:_col1 - Select Operator [SEL_142] (rows=6951 width=8) + Select Operator [SEL_141] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_141] (rows=6951 width=116) + Filter Operator [FIL_140] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_140] (rows=20854 width=116) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_139] (rows=20854 width=116) + PTF Operator [PTF_139] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_138] (rows=20854 width=116) Output:["_col0","_col1"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_49] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_23] PartitionCols:0 Top N Key Operator [TNK_101] (rows=20854 width=228) keys:_col1,top n:11 - Please refer to the previous Filter Operator [FIL_20] + Please refer to the previous Filter Operator [FIL_22] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query1.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query1.q.out index 46da1336d6..77c8e7740b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query1.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query1.q.out @@ -59,125 +59,127 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) -Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 7 <- Map 11 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 10 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 6 <- Reducer 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 vectorized - File Output Operator [FS_165] - Limit [LIM_164] (rows=100 width=100) + Reducer 3 vectorized + File Output Operator [FS_166] + Limit [LIM_165] (rows=100 width=100) Number of rows:100 - Select Operator [SEL_163] (rows=816091 width=100) + Select Operator [SEL_164] (rows=816091 width=100) Output:["_col0"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_51] - Select Operator [SEL_50] (rows=816091 width=100) + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_52] + Select Operator [SEL_51] (rows=816091 width=100) Output:["_col0"] - Top N Key Operator [TNK_79] (rows=816091 width=100) - keys:_col7,top n:100 - Merge Join Operator [MERGEJOIN_137] (rows=816091 width=100) - Conds:RS_47._col1=RS_162._col0(Inner),Output:["_col7"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_162] + Top N Key Operator [TNK_80] (rows=816091 width=100) + keys:_col1,top n:100 + Merge Join Operator [MERGEJOIN_138] (rows=816091 width=100) + Conds:RS_140._col0=RS_49._col1(Inner),Output:["_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_140] PartitionCols:_col0 - Select Operator [SEL_161] (rows=80000000 width=104) + Select Operator [SEL_139] (rows=80000000 width=104) Output:["_col0","_col1"] - TableScan [TS_38] (rows=80000000 width=104) + TableScan [TS_0] (rows=80000000 width=104) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_47] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_49] PartitionCols:_col1 - Filter Operator [FIL_46] (rows=816091 width=225) - predicate:(_col3 > _col4) - Merge Join Operator [MERGEJOIN_136] (rows=2448274 width=225) - Conds:RS_43._col2=RS_160._col1(Inner),Output:["_col1","_col3","_col4"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_160] - PartitionCols:_col1 - Select Operator [SEL_159] (rows=31 width=115) - Output:["_col0","_col1"] - Filter Operator [FIL_158] (rows=31 width=123) - predicate:CAST( (_col1 / _col2) AS decimal(21,6)) is not null - Group By Operator [GBY_157] (rows=31 width=123) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 + Select Operator [SEL_47] (rows=816091 width=225) + Output:["_col1"] + Filter Operator [FIL_46] (rows=816091 width=225) + predicate:(_col3 > _col4) + Merge Join Operator [MERGEJOIN_137] (rows=2448274 width=225) + Conds:RS_43._col2=RS_163._col1(Inner),Output:["_col1","_col3","_col4"] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_163] + PartitionCols:_col1 + Select Operator [SEL_162] (rows=31 width=115) + Output:["_col0","_col1"] + Filter Operator [FIL_161] (rows=31 width=123) + predicate:CAST( (_col1 / _col2) AS decimal(21,6)) is not null + Group By Operator [GBY_160] (rows=31 width=123) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 + Select Operator [SEL_159] (rows=14291868 width=119) + Output:["_col1","_col2"] + Group By Operator [GBY_158] (rows=14291868 width=119) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col0 + Group By Operator [GBY_30] (rows=17467258 width=119) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Merge Join Operator [MERGEJOIN_136] (rows=17467258 width=107) + Conds:RS_149._col0=RS_153._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_153] + PartitionCols:_col0 + Select Operator [SEL_151] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_150] (rows=652 width=8) + predicate:(d_year = 2000) + TableScan [TS_8] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_149] + PartitionCols:_col0 + Select Operator [SEL_147] (rows=53634860 width=119) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_145] (rows=53634860 width=119) + predicate:(sr_store_sk is not null and sr_returned_date_sk is not null) + TableScan [TS_5] (rows=57591150 width=119) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_135] (rows=2369298 width=114) + Conds:RS_143._col0=RS_157._col1(Inner),Output:["_col1","_col2","_col3"] + <-Map 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_143] + PartitionCols:_col0 + Select Operator [SEL_142] (rows=35 width=4) + Output:["_col0"] + Filter Operator [FIL_141] (rows=35 width=90) + predicate:(s_state = 'NM') + TableScan [TS_2] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_157] + PartitionCols:_col1 Select Operator [SEL_156] (rows=14291868 width=119) - Output:["_col1","_col2"] - Group By Operator [GBY_155] (rows=14291868 width=119) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col0 - Group By Operator [GBY_28] (rows=17467258 width=119) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Merge Join Operator [MERGEJOIN_135] (rows=17467258 width=107) - Conds:RS_146._col0=RS_150._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_150] - PartitionCols:_col0 - Select Operator [SEL_148] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_147] (rows=652 width=8) - predicate:(d_year = 2000) - TableScan [TS_6] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] - PartitionCols:_col0 - Select Operator [SEL_144] (rows=53634860 width=119) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_142] (rows=53634860 width=119) - predicate:(sr_store_sk is not null and sr_returned_date_sk is not null) - TableScan [TS_3] (rows=57591150 width=119) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_134] (rows=2369298 width=114) - Conds:RS_140._col0=RS_154._col1(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_140] - PartitionCols:_col0 - Select Operator [SEL_139] (rows=35 width=4) - Output:["_col0"] - Filter Operator [FIL_138] (rows=35 width=90) - predicate:(s_state = 'NM') - TableScan [TS_0] (rows=1704 width=90) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] - <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_154] - PartitionCols:_col1 - Select Operator [SEL_153] (rows=14291868 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_152] (rows=14291868 width=119) - predicate:_col2 is not null - Group By Operator [GBY_151] (rows=14291868 width=119) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_14] - PartitionCols:_col0, _col1 - Group By Operator [GBY_13] (rows=16855704 width=119) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Merge Join Operator [MERGEJOIN_133] (rows=16855704 width=107) - Conds:RS_145._col0=RS_149._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_148] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_145] - PartitionCols:_col0 - Select Operator [SEL_143] (rows=51757026 width=119) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_141] (rows=51757026 width=119) - predicate:(sr_customer_sk is not null and sr_store_sk is not null and sr_returned_date_sk is not null) - Please refer to the previous TableScan [TS_3] + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_155] (rows=14291868 width=119) + predicate:_col2 is not null + Group By Operator [GBY_154] (rows=14291868 width=119) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0, _col1 + Group By Operator [GBY_15] (rows=16855704 width=119) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Merge Join Operator [MERGEJOIN_134] (rows=16855704 width=107) + Conds:RS_148._col0=RS_152._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_152] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_151] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_148] + PartitionCols:_col0 + Select Operator [SEL_146] (rows=51757026 width=119) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_144] (rows=51757026 width=119) + predicate:(sr_customer_sk is not null and sr_store_sk is not null and sr_returned_date_sk is not null) + Please refer to the previous TableScan [TS_5] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out index ecc69e3155..8847388c83 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out @@ -159,271 +159,271 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 24 (BROADCAST_EDGE) -Map 13 <- Reducer 23 (BROADCAST_EDGE) -Map 17 <- Reducer 22 (BROADCAST_EDGE) -Map 9 <- Reducer 25 (BROADCAST_EDGE) -Reducer 10 <- Map 21 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 26 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 15 <- Map 26 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 19 <- Map 26 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE) -Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 26 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 20 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Map 14 <- Reducer 17 (BROADCAST_EDGE) +Map 24 <- Reducer 19 (BROADCAST_EDGE) +Map 25 <- Reducer 21 (BROADCAST_EDGE) +Map 26 <- Reducer 23 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 12 <- Map 1 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 16 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 19 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 20 <- Map 16 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) +Reducer 21 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 16 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 23 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 1 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_350] - Limit [LIM_349] (rows=100 width=85) + Reducer 5 vectorized + File Output Operator [FS_356] + Limit [LIM_355] (rows=100 width=85) Number of rows:100 - Select Operator [SEL_348] (rows=12248094 width=85) + Select Operator [SEL_354] (rows=12248094 width=85) Output:["_col0"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_89] - Select Operator [SEL_88] (rows=12248094 width=85) + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_95] + Select Operator [SEL_94] (rows=12248094 width=85) Output:["_col0"] - Top N Key Operator [TNK_154] (rows=12248094 width=537) - keys:_col8,top n:100 - Filter Operator [FIL_87] (rows=12248094 width=537) - predicate:CASE WHEN (_col4 is not null) THEN (CASE WHEN (_col2) THEN (((_col6 / _col1) > (_col9 / _col4))) ELSE (false) END) ELSE (false) END - Merge Join Operator [MERGEJOIN_284] (rows=24496188 width=537) - Conds:RS_84._col3=RS_347._col0(Inner),Output:["_col1","_col2","_col4","_col6","_col8","_col9"] - <-Reducer 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_347] + Top N Key Operator [TNK_160] (rows=12248094 width=537) + keys:_col1,top n:100 + Filter Operator [FIL_93] (rows=12248094 width=537) + predicate:CASE WHEN (_col9 is not null) THEN (CASE WHEN (_col7) THEN (((_col4 / _col6) > (_col2 / _col9))) ELSE (false) END) ELSE (false) END + Merge Join Operator [MERGEJOIN_290] (rows=24496188 width=537) + Conds:RS_325._col0=RS_91._col5(Inner),Output:["_col1","_col2","_col4","_col6","_col7","_col9"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_325] PartitionCols:_col0 - Select Operator [SEL_346] (rows=80000000 width=297) + Select Operator [SEL_324] (rows=80000000 width=297) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_345] (rows=80000000 width=764) + Group By Operator [GBY_323] (rows=80000000 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_75] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_74] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 + Group By Operator [GBY_16] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 Merge Join Operator [MERGEJOIN_281] (rows=187573258 width=764) - Conds:RS_70._col1=RS_313._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_313] + Conds:RS_292._col0=RS_13._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_292] PartitionCols:_col0 - Select Operator [SEL_312] (rows=80000000 width=656) + Select Operator [SEL_291] (rows=80000000 width=656) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - TableScan [TS_65] (rows=80000000 width=656) + TableScan [TS_0] (rows=80000000 width=656) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_70] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_13] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_280] (rows=187573258 width=115) - Conds:RS_344._col0=RS_291._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_291] + Conds:RS_322._col0=RS_302._col0(Inner),Output:["_col1","_col2"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_302] PartitionCols:_col0 - Select Operator [SEL_288] (rows=652 width=4) + Select Operator [SEL_299] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_285] (rows=652 width=8) + Filter Operator [FIL_296] (rows=652 width=8) predicate:(d_year = 2002) - TableScan [TS_62] (rows=73049 width=8) + TableScan [TS_5] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_344] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_322] PartitionCols:_col0 - Select Operator [SEL_343] (rows=525327388 width=119) + Select Operator [SEL_321] (rows=525327388 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_342] (rows=525327388 width=221) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_68_date_dim_d_date_sk_min) AND DynamicValue(RS_68_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_68_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_59] (rows=575995635 width=221) + Filter Operator [FIL_320] (rows=525327388 width=221) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_2] (rows=575995635 width=221) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_341] - Group By Operator [GBY_340] (rows=1 width=12) + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_319] + Group By Operator [GBY_318] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_303] - Group By Operator [GBY_299] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_314] + Group By Operator [GBY_310] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_292] (rows=652 width=4) + Select Operator [SEL_303] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_288] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_84] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_283] (rows=20485012 width=440) - Conds:RS_81._col3=RS_339._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"] - <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_339] - PartitionCols:_col0 - Select Operator [SEL_338] (rows=51391963 width=212) - Output:["_col0","_col1"] - Group By Operator [GBY_337] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_55] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_279] (rows=51391963 width=764) - Conds:RS_51._col1=RS_314._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_314] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_312] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_278] (rows=51391963 width=115) - Conds:RS_336._col0=RS_293._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_293] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_288] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_336] - PartitionCols:_col0 - Select Operator [SEL_335] (rows=143930993 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_334] (rows=143930993 width=231) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_49_date_dim_d_date_sk_min) AND DynamicValue(RS_49_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_49_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_40] (rows=144002668 width=231) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_333] - Group By Operator [GBY_332] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_304] - Group By Operator [GBY_300] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_294] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_288] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_81] + Please refer to the previous Select Operator [SEL_299] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_91] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_289] (rows=20485012 width=440) + Conds:RS_333._col0=RS_87._col3(Inner),Output:["_col1","_col3","_col4","_col5","_col6"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_87] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_282] (rows=17130654 width=328) - Conds:RS_321._col0=RS_331._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Reducer 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_331] + Merge Join Operator [MERGEJOIN_288] (rows=17130654 width=328) + Conds:RS_343._col0=RS_353._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_343] PartitionCols:_col0 - Select Operator [SEL_330] (rows=26666666 width=212) - Output:["_col0","_col1"] - Filter Operator [FIL_329] (rows=26666666 width=212) + Select Operator [SEL_342] (rows=17130654 width=216) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_341] (rows=17130654 width=212) predicate:(_col7 > 0) - Select Operator [SEL_328] (rows=80000000 width=212) + Select Operator [SEL_340] (rows=51391963 width=212) Output:["_col0","_col7"] - Group By Operator [GBY_327] (rows=80000000 width=764) + Group By Operator [GBY_339] (rows=51391963 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_36] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_57] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_35] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_277] (rows=187573258 width=764) - Conds:RS_31._col1=RS_316._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_316] + Group By Operator [GBY_56] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_285] (rows=51391963 width=764) + Conds:RS_294._col0=RS_53._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_294] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_312] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_31] + Please refer to the previous Select Operator [SEL_291] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_53] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_276] (rows=187573258 width=115) - Conds:RS_326._col0=RS_297._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_297] + Merge Join Operator [MERGEJOIN_284] (rows=51391963 width=115) + Conds:RS_338._col0=RS_306._col0(Inner),Output:["_col1","_col2"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_306] PartitionCols:_col0 - Select Operator [SEL_290] (rows=652 width=4) + Select Operator [SEL_300] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_287] (rows=652 width=8) + Filter Operator [FIL_297] (rows=652 width=8) predicate:(d_year = 2001) - Please refer to the previous TableScan [TS_62] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_326] + Please refer to the previous TableScan [TS_5] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_338] PartitionCols:_col0 - Select Operator [SEL_325] (rows=525327388 width=119) + Select Operator [SEL_337] (rows=143930993 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_324] (rows=525327388 width=221) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_29_date_dim_d_date_sk_min) AND DynamicValue(RS_29_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_29_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_20] (rows=575995635 width=221) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_323] - Group By Operator [GBY_322] (rows=1 width=12) + Filter Operator [FIL_336] (rows=143930993 width=231) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_49_date_dim_d_date_sk_min) AND DynamicValue(RS_49_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_49_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_42] (rows=144002668 width=231) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_335] + Group By Operator [GBY_334] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_306] - Group By Operator [GBY_302] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_316] + Group By Operator [GBY_312] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_298] (rows=652 width=4) + Select Operator [SEL_307] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_290] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + Please refer to the previous Select Operator [SEL_300] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_353] PartitionCols:_col0 - Select Operator [SEL_320] (rows=17130654 width=216) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_319] (rows=17130654 width=212) + Select Operator [SEL_352] (rows=26666666 width=212) + Output:["_col0","_col1"] + Filter Operator [FIL_351] (rows=26666666 width=212) predicate:(_col7 > 0) - Select Operator [SEL_318] (rows=51391963 width=212) + Select Operator [SEL_350] (rows=80000000 width=212) Output:["_col0","_col7"] - Group By Operator [GBY_317] (rows=51391963 width=764) + Group By Operator [GBY_349] (rows=80000000 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_16] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_78] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_15] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_275] (rows=51391963 width=764) - Conds:RS_11._col1=RS_315._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_315] + Group By Operator [GBY_77] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_287] (rows=187573258 width=764) + Conds:RS_295._col0=RS_74._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_312] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_11] + Please refer to the previous Select Operator [SEL_291] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_74] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_274] (rows=51391963 width=115) - Conds:RS_311._col0=RS_295._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_295] + Merge Join Operator [MERGEJOIN_286] (rows=187573258 width=115) + Conds:RS_348._col0=RS_308._col0(Inner),Output:["_col1","_col2"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_308] PartitionCols:_col0 - Select Operator [SEL_289] (rows=652 width=4) + Select Operator [SEL_301] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_286] (rows=652 width=8) + Filter Operator [FIL_298] (rows=652 width=8) predicate:(d_year = 2001) - Please refer to the previous TableScan [TS_62] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_311] + Please refer to the previous TableScan [TS_5] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_348] PartitionCols:_col0 - Select Operator [SEL_310] (rows=143930993 width=119) + Select Operator [SEL_347] (rows=525327388 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_309] (rows=143930993 width=231) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=231) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_308] - Group By Operator [GBY_307] (rows=1 width=12) + Filter Operator [FIL_346] (rows=525327388 width=221) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_70_date_dim_d_date_sk_min) AND DynamicValue(RS_70_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_70_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_63] (rows=575995635 width=221) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_345] + Group By Operator [GBY_344] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_305] - Group By Operator [GBY_301] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_317] + Group By Operator [GBY_313] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_296] (rows=652 width=4) + Select Operator [SEL_309] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_289] + Please refer to the previous Select Operator [SEL_301] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_333] + PartitionCols:_col0 + Select Operator [SEL_332] (rows=51391963 width=212) + Output:["_col0","_col1"] + Group By Operator [GBY_331] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_36] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_283] (rows=51391963 width=764) + Conds:RS_293._col0=RS_33._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_293] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_291] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_282] (rows=51391963 width=115) + Conds:RS_330._col0=RS_304._col0(Inner),Output:["_col1","_col2"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_304] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_299] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_330] + PartitionCols:_col0 + Select Operator [SEL_329] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_328] (rows=143930993 width=231) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_29_date_dim_d_date_sk_min) AND DynamicValue(RS_29_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_29_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_22] (rows=144002668 width=231) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_327] + Group By Operator [GBY_326] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_315] + Group By Operator [GBY_311] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_305] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_299] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query12.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query12.q.out index 86e17ea904..ab0799aa12 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query12.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query12.q.out @@ -73,87 +73,87 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Map 6 <- Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_83] - Limit [LIM_82] (rows=100 width=802) + Reducer 5 vectorized + File Output Operator [FS_84] + Limit [LIM_83] (rows=100 width=802) Number of rows:100 - Select Operator [SEL_81] (rows=138600 width=801) + Select Operator [SEL_82] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_80] - Select Operator [SEL_79] (rows=138600 width=801) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_81] + Select Operator [SEL_80] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Top N Key Operator [TNK_78] (rows=138600 width=689) + Top N Key Operator [TNK_79] (rows=138600 width=689) keys:_col0, _col1, _col2, _col3, ((_col5 * 100) / sum_window_0),top n:100 - PTF Operator [PTF_77] (rows=138600 width=689) + PTF Operator [PTF_78] (rows=138600 width=689) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col1"}] - Select Operator [SEL_76] (rows=138600 width=689) + Select Operator [SEL_77] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_75] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_76] PartitionCols:_col1 - Group By Operator [GBY_74] (rows=138600 width=689) + Group By Operator [GBY_75] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_16] (rows=138600 width=689) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col9, _col8, _col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_59] (rows=4798568 width=689) - Conds:RS_12._col1=RS_73._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_73] + Group By Operator [GBY_17] (rows=138600 width=689) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col8)"],keys:_col5, _col4, _col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_60] (rows=4798568 width=689) + Conds:RS_63._col0=RS_14._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_63] PartitionCols:_col0 - Select Operator [SEL_72] (rows=138600 width=581) + Select Operator [SEL_62] (rows=138600 width=581) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_71] (rows=138600 width=581) + Filter Operator [FIL_61] (rows=138600 width=581) predicate:(i_category) IN ('Jewelry', 'Sports', 'Books') - TableScan [TS_6] (rows=462000 width=581) + TableScan [TS_0] (rows=462000 width=581) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_58] (rows=15995224 width=115) - Conds:RS_70._col0=RS_62._col0(Inner),Output:["_col1","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_62] + Merge Join Operator [MERGEJOIN_59] (rows=15995224 width=115) + Conds:RS_74._col0=RS_66._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_66] PartitionCols:_col0 - Select Operator [SEL_61] (rows=8116 width=4) + Select Operator [SEL_65] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_60] (rows=8116 width=98) + Filter Operator [FIL_64] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00' AND TIMESTAMP'2001-02-11 00:00:00' - TableScan [TS_3] (rows=73049 width=98) + TableScan [TS_6] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_70] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_74] PartitionCols:_col0 - Select Operator [SEL_69] (rows=143966864 width=119) + Select Operator [SEL_73] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_68] (rows=143966864 width=119) + Filter Operator [FIL_72] (rows=143966864 width=119) predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=119) + TableScan [TS_3] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_sales_price"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_67] - Group By Operator [GBY_66] (rows=1 width=12) + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_71] + Group By Operator [GBY_70] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_65] - Group By Operator [GBY_64] (rows=1 width=12) + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_69] + Group By Operator [GBY_68] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_63] (rows=8116 width=4) + Select Operator [SEL_67] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_61] + Please refer to the previous Select Operator [SEL_65] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out index c6274c0574..6973d82f8e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out @@ -115,105 +115,107 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Map 7 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_120] - Select Operator [SEL_119] (rows=1 width=344) + Reducer 4 vectorized + File Output Operator [FS_122] + Select Operator [SEL_121] (rows=1 width=344) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_118] (rows=1 width=256) + Group By Operator [GBY_120] (rows=1 width=256) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_31] - Group By Operator [GBY_30] (rows=1 width=256) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col6)","count(_col6)"] - Select Operator [SEL_29] (rows=368553 width=44) - Output:["_col4","_col5","_col6"] - Filter Operator [FIL_28] (rows=368553 width=44) - predicate:((_col19 and _col20 and _col10 and _col26) or (_col21 and _col22 and _col11 and _col27) or (_col23 and _col24 and _col12 and _col27)) - Merge Join Operator [MERGEJOIN_97] (rows=1965626 width=44) - Conds:RS_25._col2=RS_117._col0(Inner),Output:["_col4","_col5","_col6","_col10","_col11","_col12","_col19","_col20","_col21","_col22","_col23","_col24","_col26","_col27"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_33] + Group By Operator [GBY_32] (rows=1 width=256) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col15)","count(_col15)","sum(_col16)","count(_col16)","sum(_col17)","count(_col17)"] + Select Operator [SEL_31] (rows=368553 width=44) + Output:["_col15","_col16","_col17"] + Filter Operator [FIL_30] (rows=368553 width=44) + predicate:((_col1 and _col2 and _col21 and _col26) or (_col3 and _col4 and _col22 and _col27) or (_col5 and _col6 and _col23 and _col27)) + Merge Join Operator [MERGEJOIN_99] (rows=1965626 width=44) + Conds:RS_27._col13=RS_119._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col15","_col16","_col17","_col21","_col22","_col23","_col26","_col27"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] + SHUFFLE [RS_119] PartitionCols:_col0 - Select Operator [SEL_116] (rows=1309 width=12) + Select Operator [SEL_118] (rows=1309 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_115] (rows=1309 width=8) + Filter Operator [FIL_117] (rows=1309 width=8) predicate:(hd_dep_count) IN (3, 1) - TableScan [TS_12] (rows=7200 width=8) + TableScan [TS_21] (rows=7200 width=8) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_96] (rows=10811694 width=36) - Conds:RS_22._col1=RS_114._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col10","_col11","_col12","_col19","_col20","_col21","_col22","_col23","_col24"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_114] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col13 + Merge Join Operator [MERGEJOIN_98] (rows=10811694 width=36) + Conds:RS_102._col0=RS_25._col5(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col13","_col15","_col16","_col17","_col21","_col22","_col23"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_102] PartitionCols:_col0 - Select Operator [SEL_113] (rows=265971 width=28) + Select Operator [SEL_101] (rows=265971 width=28) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_112] (rows=265971 width=183) + Filter Operator [FIL_100] (rows=265971 width=183) predicate:((cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree')) - TableScan [TS_9] (rows=1861800 width=183) + TableScan [TS_0] (rows=1861800 width=183) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_22] - PartitionCols:_col1 - Filter Operator [FIL_21] (rows=10811694 width=36) - predicate:((_col15 and _col7) or (_col16 and _col8) or (_col17 and _col9)) - Merge Join Operator [MERGEJOIN_95] (rows=14415593 width=36) - Conds:RS_18._col3=RS_111._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_111] - PartitionCols:_col0 - Select Operator [SEL_110] (rows=3529412 width=16) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_109] (rows=3529412 width=187) - predicate:((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States')) - TableScan [TS_6] (rows=40000000 width=187) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_94] (rows=163376714 width=233) - Conds:RS_108._col0=RS_100._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_100] - PartitionCols:_col0 - Select Operator [SEL_99] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_98] (rows=652 width=8) - predicate:(d_year = 2001) - TableScan [TS_3] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_108] - PartitionCols:_col0 - Select Operator [SEL_107] (rows=457561292 width=257) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - Filter Operator [FIL_106] (rows=457561292 width=450) - predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_addr_sk is not null and ss_hdemo_sk is not null and ss_store_sk is not null and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or (ss_sales_price >= 50) or (ss_sales_price <= 100) or (ss_sales_price >= 150) or (ss_sales_price <= 200)) and ((ss_net_profit >= 100) or (ss_net_profit <= 200) or (ss_net_profit >= 150) or (ss_net_profit <= 300) or (ss_net_profit >= 50) or (ss_net_profit <= 250)) and ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=450) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_105] - Group By Operator [GBY_104] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_103] - Group By Operator [GBY_102] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_101] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_99] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col5 + Select Operator [SEL_20] (rows=10811694 width=36) + Output:["_col5","_col6","_col8","_col9","_col10","_col14","_col15","_col16"] + Filter Operator [FIL_19] (rows=10811694 width=36) + predicate:((_col1 and _col11) or (_col2 and _col12) or (_col3 and _col13)) + Merge Join Operator [MERGEJOIN_97] (rows=14415593 width=36) + Conds:RS_105._col0=RS_17._col3(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_105] + PartitionCols:_col0 + Select Operator [SEL_104] (rows=3529412 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_103] (rows=3529412 width=187) + predicate:((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States')) + TableScan [TS_3] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_96] (rows=163376714 width=233) + Conds:RS_116._col0=RS_108._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_108] + PartitionCols:_col0 + Select Operator [SEL_107] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_106] (rows=652 width=8) + predicate:(d_year = 2001) + TableScan [TS_9] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_116] + PartitionCols:_col0 + Select Operator [SEL_115] (rows=457561292 width=257) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Filter Operator [FIL_114] (rows=457561292 width=450) + predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_addr_sk is not null and ss_hdemo_sk is not null and ss_store_sk is not null and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or (ss_sales_price >= 50) or (ss_sales_price <= 100) or (ss_sales_price >= 150) or (ss_sales_price <= 200)) and ((ss_net_profit >= 100) or (ss_net_profit <= 200) or (ss_net_profit >= 150) or (ss_net_profit <= 300) or (ss_net_profit >= 50) or (ss_net_profit <= 250)) and ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_6] (rows=575995635 width=450) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_113] + Group By Operator [GBY_112] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_111] + Group By Operator [GBY_110] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_109] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_107] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out index 8204245245..63cac8f9f3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out @@ -1,6 +1,6 @@ -Warning: Shuffle Join MERGEJOIN[1196][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[1203][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[1210][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 22' is a cross product +Warning: Shuffle Join MERGEJOIN[1202][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[1217][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 21' is a cross product +Warning: Shuffle Join MERGEJOIN[1228][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product PREHOOK: query: explain with cross_items as (select i_item_sk ss_item_sk @@ -222,840 +222,840 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE) -Map 24 <- Reducer 40 (BROADCAST_EDGE) -Map 63 <- Reducer 46 (BROADCAST_EDGE) -Map 64 <- Reducer 52 (BROADCAST_EDGE) -Map 66 <- Reducer 56 (BROADCAST_EDGE) +Map 38 <- Reducer 41 (BROADCAST_EDGE) +Map 56 <- Reducer 43 (BROADCAST_EDGE) +Map 57 <- Reducer 45 (BROADCAST_EDGE) +Map 58 <- Reducer 61 (BROADCAST_EDGE) +Map 66 <- Reducer 49 (BROADCAST_EDGE) Map 67 <- Reducer 72 (BROADCAST_EDGE) Map 73 <- Reducer 78 (BROADCAST_EDGE) -Map 79 <- Reducer 17 (BROADCAST_EDGE) -Map 80 <- Reducer 23 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 10 (SIMPLE_EDGE), Map 79 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) -Reducer 14 <- Map 65 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 59 (CUSTOM_SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 17 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 10 (SIMPLE_EDGE), Map 80 (SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 38 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 20 <- Map 65 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (CUSTOM_SIMPLE_EDGE), Reducer 62 (CUSTOM_SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 23 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 24 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE) -Reducer 26 <- Map 65 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 29 <- Union 28 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) -Reducer 30 <- Map 65 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) -Reducer 31 <- Reducer 26 (SIMPLE_EDGE), Union 32 (CONTAINS) -Reducer 33 <- Union 32 (SIMPLE_EDGE) -Reducer 34 <- Map 65 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) -Reducer 35 <- Reducer 26 (SIMPLE_EDGE), Union 36 (CONTAINS) -Reducer 37 <- Union 36 (SIMPLE_EDGE) -Reducer 38 <- Map 65 (SIMPLE_EDGE), Reducer 37 (SIMPLE_EDGE) -Reducer 4 <- Map 65 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 40 <- Map 39 (CUSTOM_SIMPLE_EDGE) -Reducer 41 <- Map 39 (SIMPLE_EDGE), Map 63 (SIMPLE_EDGE) -Reducer 42 <- Map 65 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE) -Reducer 43 <- Reducer 42 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 44 <- Reducer 42 (SIMPLE_EDGE), Union 32 (CONTAINS) -Reducer 45 <- Reducer 42 (SIMPLE_EDGE), Union 36 (CONTAINS) -Reducer 46 <- Map 39 (CUSTOM_SIMPLE_EDGE) -Reducer 47 <- Map 39 (SIMPLE_EDGE), Map 64 (SIMPLE_EDGE) -Reducer 48 <- Map 65 (SIMPLE_EDGE), Reducer 47 (SIMPLE_EDGE) -Reducer 49 <- Reducer 48 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 50 <- Reducer 48 (SIMPLE_EDGE), Union 32 (CONTAINS) -Reducer 51 <- Reducer 48 (SIMPLE_EDGE), Union 36 (CONTAINS) -Reducer 52 <- Map 39 (CUSTOM_SIMPLE_EDGE) -Reducer 53 <- Map 39 (SIMPLE_EDGE), Map 66 (SIMPLE_EDGE), Union 54 (CONTAINS) +Map 79 <- Reducer 63 (BROADCAST_EDGE) +Map 80 <- Reducer 65 (BROADCAST_EDGE) +Reducer 10 <- Map 1 (SIMPLE_EDGE), Reducer 39 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 12 (CONTAINS) +Reducer 13 <- Union 12 (SIMPLE_EDGE) +Reducer 14 <- Reducer 10 (SIMPLE_EDGE), Union 15 (CONTAINS) +Reducer 16 <- Union 15 (SIMPLE_EDGE) +Reducer 17 <- Map 1 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 62 (SIMPLE_EDGE) +Reducer 19 <- Map 1 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (CUSTOM_SIMPLE_EDGE), Reducer 52 (CUSTOM_SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 22 <- Reducer 10 (SIMPLE_EDGE), Union 23 (CONTAINS) +Reducer 24 <- Union 23 (SIMPLE_EDGE) +Reducer 25 <- Map 1 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 26 <- Reducer 25 (SIMPLE_EDGE), Reducer 64 (SIMPLE_EDGE) +Reducer 27 <- Map 1 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (SIMPLE_EDGE) +Reducer 29 <- Reducer 28 (CUSTOM_SIMPLE_EDGE), Reducer 55 (CUSTOM_SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 1 (SIMPLE_EDGE), Reducer 42 (SIMPLE_EDGE) +Reducer 31 <- Reducer 30 (SIMPLE_EDGE), Union 12 (CONTAINS) +Reducer 32 <- Reducer 30 (SIMPLE_EDGE), Union 15 (CONTAINS) +Reducer 33 <- Reducer 30 (SIMPLE_EDGE), Union 23 (CONTAINS) +Reducer 34 <- Map 1 (SIMPLE_EDGE), Reducer 44 (SIMPLE_EDGE) +Reducer 35 <- Reducer 34 (SIMPLE_EDGE), Union 12 (CONTAINS) +Reducer 36 <- Reducer 34 (SIMPLE_EDGE), Union 15 (CONTAINS) +Reducer 37 <- Reducer 34 (SIMPLE_EDGE), Union 23 (CONTAINS) +Reducer 39 <- Map 38 (SIMPLE_EDGE), Map 40 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 48 (CUSTOM_SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 41 <- Map 40 (CUSTOM_SIMPLE_EDGE) +Reducer 42 <- Map 40 (SIMPLE_EDGE), Map 56 (SIMPLE_EDGE) +Reducer 43 <- Map 40 (CUSTOM_SIMPLE_EDGE) +Reducer 44 <- Map 40 (SIMPLE_EDGE), Map 57 (SIMPLE_EDGE) +Reducer 45 <- Map 40 (CUSTOM_SIMPLE_EDGE) +Reducer 46 <- Map 40 (SIMPLE_EDGE), Map 66 (SIMPLE_EDGE), Union 47 (CONTAINS) +Reducer 48 <- Union 47 (CUSTOM_SIMPLE_EDGE) +Reducer 49 <- Map 40 (CUSTOM_SIMPLE_EDGE) +Reducer 50 <- Map 40 (SIMPLE_EDGE), Map 66 (SIMPLE_EDGE), Union 51 (CONTAINS) +Reducer 52 <- Union 51 (CUSTOM_SIMPLE_EDGE) +Reducer 53 <- Map 40 (SIMPLE_EDGE), Map 66 (SIMPLE_EDGE), Union 54 (CONTAINS) Reducer 55 <- Union 54 (CUSTOM_SIMPLE_EDGE) -Reducer 56 <- Map 39 (CUSTOM_SIMPLE_EDGE) -Reducer 57 <- Map 39 (SIMPLE_EDGE), Map 66 (SIMPLE_EDGE), Union 58 (CONTAINS) -Reducer 59 <- Union 58 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE), Reducer 55 (CUSTOM_SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 60 <- Map 39 (SIMPLE_EDGE), Map 66 (SIMPLE_EDGE), Union 61 (CONTAINS) -Reducer 62 <- Union 61 (CUSTOM_SIMPLE_EDGE) -Reducer 68 <- Map 67 (SIMPLE_EDGE), Map 71 (SIMPLE_EDGE), Union 54 (CONTAINS) -Reducer 69 <- Map 67 (SIMPLE_EDGE), Map 71 (SIMPLE_EDGE), Union 58 (CONTAINS) -Reducer 70 <- Map 67 (SIMPLE_EDGE), Map 71 (SIMPLE_EDGE), Union 61 (CONTAINS) +Reducer 59 <- Map 58 (SIMPLE_EDGE), Map 60 (SIMPLE_EDGE) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 61 <- Map 60 (CUSTOM_SIMPLE_EDGE) +Reducer 62 <- Map 60 (SIMPLE_EDGE), Map 79 (SIMPLE_EDGE) +Reducer 63 <- Map 60 (CUSTOM_SIMPLE_EDGE) +Reducer 64 <- Map 60 (SIMPLE_EDGE), Map 80 (SIMPLE_EDGE) +Reducer 65 <- Map 60 (CUSTOM_SIMPLE_EDGE) +Reducer 68 <- Map 67 (SIMPLE_EDGE), Map 71 (SIMPLE_EDGE), Union 47 (CONTAINS) +Reducer 69 <- Map 67 (SIMPLE_EDGE), Map 71 (SIMPLE_EDGE), Union 51 (CONTAINS) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 70 <- Map 67 (SIMPLE_EDGE), Map 71 (SIMPLE_EDGE), Union 54 (CONTAINS) Reducer 72 <- Map 71 (CUSTOM_SIMPLE_EDGE) -Reducer 74 <- Map 73 (SIMPLE_EDGE), Map 77 (SIMPLE_EDGE), Union 54 (CONTAINS) -Reducer 75 <- Map 73 (SIMPLE_EDGE), Map 77 (SIMPLE_EDGE), Union 58 (CONTAINS) -Reducer 76 <- Map 73 (SIMPLE_EDGE), Map 77 (SIMPLE_EDGE), Union 61 (CONTAINS) +Reducer 74 <- Map 73 (SIMPLE_EDGE), Map 77 (SIMPLE_EDGE), Union 47 (CONTAINS) +Reducer 75 <- Map 73 (SIMPLE_EDGE), Map 77 (SIMPLE_EDGE), Union 51 (CONTAINS) +Reducer 76 <- Map 73 (SIMPLE_EDGE), Map 77 (SIMPLE_EDGE), Union 54 (CONTAINS) Reducer 78 <- Map 77 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Union 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 8 <- Map 1 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 9 <- Reducer 59 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 9 vectorized - File Output Operator [FS_1366] - Limit [LIM_1365] (rows=100 width=223) + Reducer 7 vectorized + File Output Operator [FS_1372] + Limit [LIM_1371] (rows=100 width=223) Number of rows:100 - Select Operator [SEL_1364] (rows=304320 width=222) + Select Operator [SEL_1370] (rows=304320 width=222) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1363] - Select Operator [SEL_1362] (rows=304320 width=222) + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1369] + Select Operator [SEL_1368] (rows=304320 width=222) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_1361] (rows=304320 width=230) + Group By Operator [GBY_1367] (rows=304320 width=230) Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Union 7 [SIMPLE_EDGE] - <-Reducer 16 [CONTAINS] - Reduce Output Operator [RS_1209] + <-Union 5 [SIMPLE_EDGE] + <-Reducer 21 [CONTAINS] + Reduce Output Operator [RS_1223] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1208] (rows=304320 width=230) + Group By Operator [GBY_1222] (rows=304320 width=230) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1207] (rows=121728 width=220) + Top N Key Operator [TNK_1221] (rows=121728 width=220) keys:_col0, _col1, _col2, _col3,top n:100 - Select Operator [SEL_1205] (rows=40576 width=222) + Select Operator [SEL_1219] (rows=40576 width=222) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1204] (rows=40576 width=243) + Filter Operator [FIL_1218] (rows=40576 width=243) predicate:(_col3 > _col5) - Merge Join Operator [MERGEJOIN_1203] (rows=121728 width=243) + Merge Join Operator [MERGEJOIN_1217] (rows=121728 width=243) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1378] - Filter Operator [FIL_1377] (rows=121728 width=131) + <-Reducer 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1415] + Filter Operator [FIL_1414] (rows=121728 width=131) predicate:_col3 is not null - Group By Operator [GBY_1376] (rows=121728 width=131) + Group By Operator [GBY_1413] (rows=121728 width=131) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_238] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_242] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_237] (rows=486912 width=131) + Group By Operator [GBY_241] (rows=486912 width=131) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_235] (rows=7790806 width=106) + Select Operator [SEL_239] (rows=7790806 width=106) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1176] (rows=7790806 width=106) - Conds:RS_232._col1=RS_1344._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1344] + Merge Join Operator [MERGEJOIN_1182] (rows=7790806 width=106) + Conds:RS_1327._col0=RS_237._col2(Inner),Output:["_col1","_col2","_col3","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1327] PartitionCols:_col0 - Select Operator [SEL_1335] (rows=462000 width=15) + Select Operator [SEL_1318] (rows=462000 width=15) Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_81] (rows=462000 width=15) + TableScan [TS_0] (rows=462000 width=15) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_232] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1175] (rows=7790806 width=98) - Conds:RS_229._col1=RS_230._col0(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_229] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1167] (rows=7790806 width=98) - Conds:RS_1371._col0=RS_1311._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1311] - PartitionCols:_col0 - Select Operator [SEL_1308] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_1307] (rows=50 width=12) - predicate:((d_year = 2000) and (d_moy = 11)) - TableScan [TS_3] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 79 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1371] - PartitionCols:_col0 - Select Operator [SEL_1370] (rows=286549727 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1369] (rows=286549727 width=123) - predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_227_date_dim_d_date_sk_min) AND DynamicValue(RS_227_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_227_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_143] (rows=287989836 width=123) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_quantity","cs_list_price"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1368] - Group By Operator [GBY_1367] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1319] - Group By Operator [GBY_1316] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1312] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1308] - <-Reducer 34 [SIMPLE_EDGE] - SHUFFLE [RS_230] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_237] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_1181] (rows=7790806 width=98) + Conds:RS_232._col0=RS_233._col1(Inner),Output:["_col2","_col3","_col4"] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_232] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1174] (rows=724 width=4) - Conds:RS_1352._col1, _col2, _col3=RS_1375._col0, _col1, _col2(Inner),Output:["_col0"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1352] + Merge Join Operator [MERGEJOIN_1179] (rows=724 width=4) + Conds:RS_1335._col1, _col2, _col3=RS_1407._col0, _col1, _col2(Inner),Output:["_col0"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1335] PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1345] (rows=458612 width=15) + Select Operator [SEL_1328] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1336] (rows=458612 width=15) + Filter Operator [FIL_1319] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) - Please refer to the previous TableScan [TS_81] - <-Reducer 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1375] + Please refer to the previous TableScan [TS_0] + <-Reducer 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1407] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1374] (rows=1 width=12) + Select Operator [SEL_1406] (rows=1 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1373] (rows=1 width=20) + Filter Operator [FIL_1405] (rows=1 width=20) predicate:(_col3 = 3L) - Group By Operator [GBY_1372] (rows=121728 width=19) + Group By Operator [GBY_1404] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 32 [SIMPLE_EDGE] - <-Reducer 31 [CONTAINS] vectorized - Reduce Output Operator [RS_1429] + <-Union 15 [SIMPLE_EDGE] + <-Reducer 14 [CONTAINS] vectorized + Reduce Output Operator [RS_1403] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1428] (rows=121728 width=19) + Group By Operator [GBY_1402] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1427] (rows=121728 width=19) + Group By Operator [GBY_1401] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_169] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_167] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_25] (rows=121728 width=19) + Group By Operator [GBY_21] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 - Merge Join Operator [MERGEJOIN_1156] (rows=14628613 width=11) - Conds:RS_21._col1=RS_1349._col0(Inner),Output:["_col4","_col5","_col6"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1349] + Merge Join Operator [MERGEJOIN_1161] (rows=14628613 width=11) + Conds:RS_17._col1=RS_1332._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1332] PartitionCols:_col0 - Select Operator [SEL_1341] (rows=458612 width=15) + Select Operator [SEL_1324] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1332] (rows=458612 width=15) + Filter Operator [FIL_1315] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) - Please refer to the previous TableScan [TS_81] - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_21] + Please refer to the previous TableScan [TS_0] + <-Reducer 39 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1155] (rows=14736682 width=4) - Conds:RS_1423._col0=RS_1401._col0(Inner),Output:["_col1"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1401] + Merge Join Operator [MERGEJOIN_1160] (rows=14736682 width=4) + Conds:RS_1397._col0=RS_1375._col0(Inner),Output:["_col1"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1375] PartitionCols:_col0 - Select Operator [SEL_1400] (rows=1957 width=4) + Select Operator [SEL_1374] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_1399] (rows=1957 width=8) + Filter Operator [FIL_1373] (rows=1957 width=8) predicate:d_year BETWEEN 1999 AND 2001 - TableScan [TS_12] (rows=73049 width=8) + TableScan [TS_8] (rows=73049 width=8) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1423] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1397] PartitionCols:_col0 - Select Operator [SEL_1422] (rows=550076554 width=7) + Select Operator [SEL_1396] (rows=550076554 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_1421] (rows=550076554 width=7) - predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_19_d1_d_date_sk_min) AND DynamicValue(RS_19_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_d1_d_date_sk_bloom_filter))) - TableScan [TS_9] (rows=575995635 width=7) + Filter Operator [FIL_1395] (rows=550076554 width=7) + predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_15_d1_d_date_sk_min) AND DynamicValue(RS_15_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_d1_d_date_sk_bloom_filter))) + TableScan [TS_5] (rows=575995635 width=7) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk"] - <-Reducer 40 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1420] - Group By Operator [GBY_1419] (rows=1 width=12) + <-Reducer 41 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1394] + Group By Operator [GBY_1393] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 39 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1415] - Group By Operator [GBY_1411] (rows=1 width=12) + <-Map 40 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1389] + Group By Operator [GBY_1385] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1402] (rows=1957 width=4) + Select Operator [SEL_1376] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1400] - <-Reducer 44 [CONTAINS] vectorized - Reduce Output Operator [RS_1443] + Please refer to the previous Select Operator [SEL_1374] + <-Reducer 32 [CONTAINS] vectorized + Reduce Output Operator [RS_1449] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1442] (rows=121728 width=19) + Group By Operator [GBY_1448] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1441] (rows=121728 width=19) + Group By Operator [GBY_1447] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 42 [SIMPLE_EDGE] - SHUFFLE [RS_189] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_187] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_45] (rows=121728 width=19) + Group By Operator [GBY_41] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 - Merge Join Operator [MERGEJOIN_1158] (rows=7620440 width=11) - Conds:RS_41._col1=RS_1350._col0(Inner),Output:["_col4","_col5","_col6"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1350] + Merge Join Operator [MERGEJOIN_1163] (rows=7620440 width=11) + Conds:RS_37._col1=RS_1333._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1333] PartitionCols:_col0 - Select Operator [SEL_1342] (rows=458612 width=15) + Select Operator [SEL_1325] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1333] (rows=458612 width=15) + Filter Operator [FIL_1316] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) - Please refer to the previous TableScan [TS_81] - <-Reducer 41 [SIMPLE_EDGE] - SHUFFLE [RS_41] + Please refer to the previous TableScan [TS_0] + <-Reducer 42 [SIMPLE_EDGE] + SHUFFLE [RS_37] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1157] (rows=7676736 width=4) - Conds:RS_1437._col0=RS_1403._col0(Inner),Output:["_col1"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1403] + Merge Join Operator [MERGEJOIN_1162] (rows=7676736 width=4) + Conds:RS_1443._col0=RS_1377._col0(Inner),Output:["_col1"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1377] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1400] - <-Map 63 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1437] + Please refer to the previous Select Operator [SEL_1374] + <-Map 56 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1443] PartitionCols:_col0 - Select Operator [SEL_1436] (rows=286549727 width=7) + Select Operator [SEL_1442] (rows=286549727 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_1435] (rows=286549727 width=7) - predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_39_d2_d_date_sk_min) AND DynamicValue(RS_39_d2_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_39_d2_d_date_sk_bloom_filter))) - TableScan [TS_29] (rows=287989836 width=7) + Filter Operator [FIL_1441] (rows=286549727 width=7) + predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_35_d2_d_date_sk_min) AND DynamicValue(RS_35_d2_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_35_d2_d_date_sk_bloom_filter))) + TableScan [TS_25] (rows=287989836 width=7) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk"] - <-Reducer 46 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1434] - Group By Operator [GBY_1433] (rows=1 width=12) + <-Reducer 43 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1440] + Group By Operator [GBY_1439] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 39 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1416] - Group By Operator [GBY_1412] (rows=1 width=12) + <-Map 40 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1390] + Group By Operator [GBY_1386] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1404] (rows=1957 width=4) + Select Operator [SEL_1378] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1400] - <-Reducer 50 [CONTAINS] vectorized - Reduce Output Operator [RS_1457] + Please refer to the previous Select Operator [SEL_1374] + <-Reducer 36 [CONTAINS] vectorized + Reduce Output Operator [RS_1463] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1456] (rows=121728 width=19) + Group By Operator [GBY_1462] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1455] (rows=121728 width=19) + Group By Operator [GBY_1461] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 48 [SIMPLE_EDGE] - SHUFFLE [RS_210] + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_208] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_66] (rows=121728 width=19) + Group By Operator [GBY_62] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 - Merge Join Operator [MERGEJOIN_1160] (rows=3828623 width=11) - Conds:RS_62._col1=RS_1351._col0(Inner),Output:["_col4","_col5","_col6"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1351] + Merge Join Operator [MERGEJOIN_1165] (rows=3828623 width=11) + Conds:RS_58._col1=RS_1334._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1334] PartitionCols:_col0 - Select Operator [SEL_1343] (rows=458612 width=15) + Select Operator [SEL_1326] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1334] (rows=458612 width=15) + Filter Operator [FIL_1317] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) - Please refer to the previous TableScan [TS_81] - <-Reducer 47 [SIMPLE_EDGE] - SHUFFLE [RS_62] + Please refer to the previous TableScan [TS_0] + <-Reducer 44 [SIMPLE_EDGE] + SHUFFLE [RS_58] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1159] (rows=3856907 width=4) - Conds:RS_1451._col0=RS_1405._col0(Inner),Output:["_col1"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1405] + Merge Join Operator [MERGEJOIN_1164] (rows=3856907 width=4) + Conds:RS_1457._col0=RS_1379._col0(Inner),Output:["_col1"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1379] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1400] - <-Map 64 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1451] + Please refer to the previous Select Operator [SEL_1374] + <-Map 57 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1457] PartitionCols:_col0 - Select Operator [SEL_1450] (rows=143966864 width=7) + Select Operator [SEL_1456] (rows=143966864 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_1449] (rows=143966864 width=7) - predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_60_d3_d_date_sk_min) AND DynamicValue(RS_60_d3_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_60_d3_d_date_sk_bloom_filter))) - TableScan [TS_50] (rows=144002668 width=7) + Filter Operator [FIL_1455] (rows=143966864 width=7) + predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_56_d3_d_date_sk_min) AND DynamicValue(RS_56_d3_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_56_d3_d_date_sk_bloom_filter))) + TableScan [TS_46] (rows=144002668 width=7) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk"] - <-Reducer 52 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1448] - Group By Operator [GBY_1447] (rows=1 width=12) + <-Reducer 45 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1454] + Group By Operator [GBY_1453] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 39 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1417] - Group By Operator [GBY_1413] (rows=1 width=12) + <-Map 40 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1391] + Group By Operator [GBY_1387] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1406] (rows=1957 width=4) + Select Operator [SEL_1380] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1400] - <-Reducer 59 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1382] - Select Operator [SEL_1381] (rows=1 width=112) + Please refer to the previous Select Operator [SEL_1374] + <-Reducer 62 [SIMPLE_EDGE] + SHUFFLE [RS_233] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1180] (rows=7790806 width=98) + Conds:RS_1412._col0=RS_1345._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 60 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1345] + PartitionCols:_col0 + Select Operator [SEL_1342] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_1341] (rows=50 width=12) + predicate:((d_year = 2000) and (d_moy = 11)) + TableScan [TS_80] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 79 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1412] + PartitionCols:_col0 + Select Operator [SEL_1411] (rows=286549727 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1410] (rows=286549727 width=123) + predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_229_date_dim_d_date_sk_min) AND DynamicValue(RS_229_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_229_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_222] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_quantity","cs_list_price"] + <-Reducer 63 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1409] + Group By Operator [GBY_1408] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 60 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1353] + Group By Operator [GBY_1350] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1346] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1342] + <-Reducer 52 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1419] + Select Operator [SEL_1418] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_1380] (rows=1 width=120) + Filter Operator [FIL_1417] (rows=1 width=120) predicate:CAST( (_col0 / _col1) AS decimal(22,6)) is not null - Group By Operator [GBY_1379] (rows=1 width=120) + Group By Operator [GBY_1416] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] - <-Union 58 [CUSTOM_SIMPLE_EDGE] - <-Reducer 57 [CONTAINS] - Reduce Output Operator [RS_1264] - Group By Operator [GBY_1263] (rows=1 width=120) + <-Union 51 [CUSTOM_SIMPLE_EDGE] + <-Reducer 50 [CONTAINS] + Reduce Output Operator [RS_1270] + Group By Operator [GBY_1269] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1262] (rows=26270325 width=44) + Select Operator [SEL_1268] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1260] (rows=14736682 width=0) + Select Operator [SEL_1266] (rows=14736682 width=0) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1259] (rows=14736682 width=0) - Conds:RS_1466._col0=RS_1409._col0(Inner),Output:["_col1","_col2"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1409] + Merge Join Operator [MERGEJOIN_1265] (rows=14736682 width=0) + Conds:RS_1472._col0=RS_1383._col0(Inner),Output:["_col1","_col2"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1383] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1400] + Please refer to the previous Select Operator [SEL_1374] <-Map 66 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1466] + SHUFFLE [RS_1472] PartitionCols:_col0 - Select Operator [SEL_1464] (rows=550076554 width=114) + Select Operator [SEL_1470] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1463] (rows=550076554 width=114) - predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_106_date_dim_d_date_sk_min) AND DynamicValue(RS_106_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_106_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_99] (rows=575995635 width=114) + Filter Operator [FIL_1469] (rows=550076554 width=114) + predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_108_date_dim_d_date_sk_min) AND DynamicValue(RS_108_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_108_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_101] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_quantity","ss_list_price"] - <-Reducer 56 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1462] - Group By Operator [GBY_1461] (rows=1 width=12) + <-Reducer 49 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1468] + Group By Operator [GBY_1467] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 39 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1418] - Group By Operator [GBY_1414] (rows=1 width=12) + <-Map 40 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1392] + Group By Operator [GBY_1388] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1408] (rows=1957 width=4) + Select Operator [SEL_1382] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1400] + Please refer to the previous Select Operator [SEL_1374] <-Reducer 69 [CONTAINS] - Reduce Output Operator [RS_1282] - Group By Operator [GBY_1281] (rows=1 width=120) + Reduce Output Operator [RS_1288] + Group By Operator [GBY_1287] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1280] (rows=26270325 width=44) + Select Operator [SEL_1286] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1278] (rows=7676736 width=94) + Select Operator [SEL_1284] (rows=7676736 width=94) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1277] (rows=7676736 width=94) - Conds:RS_1481._col0=RS_1472._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1283] (rows=7676736 width=94) + Conds:RS_1487._col0=RS_1478._col0(Inner),Output:["_col1","_col2"] <-Map 71 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1472] + PARTITION_ONLY_SHUFFLE [RS_1478] PartitionCols:_col0 - Select Operator [SEL_1469] (rows=1957 width=4) + Select Operator [SEL_1475] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_1468] (rows=1957 width=8) + Filter Operator [FIL_1474] (rows=1957 width=8) predicate:d_year BETWEEN 1998 AND 2000 - TableScan [TS_112] (rows=73049 width=8) + TableScan [TS_114] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 67 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1481] + SHUFFLE [RS_1487] PartitionCols:_col0 - Select Operator [SEL_1479] (rows=286549727 width=119) + Select Operator [SEL_1485] (rows=286549727 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1478] (rows=286549727 width=119) - predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_116_date_dim_d_date_sk_min) AND DynamicValue(RS_116_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_116_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_109] (rows=287989836 width=119) + Filter Operator [FIL_1484] (rows=286549727 width=119) + predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_118_date_dim_d_date_sk_min) AND DynamicValue(RS_118_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_118_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_111] (rows=287989836 width=119) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_quantity","cs_list_price"] <-Reducer 72 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1477] - Group By Operator [GBY_1476] (rows=1 width=12) + BROADCAST [RS_1483] + Group By Operator [GBY_1482] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 71 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1475] - Group By Operator [GBY_1474] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1481] + Group By Operator [GBY_1480] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1471] (rows=1957 width=4) + Select Operator [SEL_1477] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1469] + Please refer to the previous Select Operator [SEL_1475] <-Reducer 75 [CONTAINS] - Reduce Output Operator [RS_1300] - Group By Operator [GBY_1299] (rows=1 width=120) + Reduce Output Operator [RS_1306] + Group By Operator [GBY_1305] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1298] (rows=26270325 width=44) + Select Operator [SEL_1304] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1296] (rows=3856907 width=114) + Select Operator [SEL_1302] (rows=3856907 width=114) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1295] (rows=3856907 width=114) - Conds:RS_1496._col0=RS_1487._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1301] (rows=3856907 width=114) + Conds:RS_1502._col0=RS_1493._col0(Inner),Output:["_col1","_col2"] <-Map 77 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1487] + PARTITION_ONLY_SHUFFLE [RS_1493] PartitionCols:_col0 - Select Operator [SEL_1484] (rows=1957 width=4) + Select Operator [SEL_1490] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_1483] (rows=1957 width=8) + Filter Operator [FIL_1489] (rows=1957 width=8) predicate:d_year BETWEEN 1998 AND 2000 - TableScan [TS_123] (rows=73049 width=8) + TableScan [TS_125] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1496] + SHUFFLE [RS_1502] PartitionCols:_col0 - Select Operator [SEL_1494] (rows=143966864 width=119) + Select Operator [SEL_1500] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1493] (rows=143966864 width=119) - predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_127_date_dim_d_date_sk_min) AND DynamicValue(RS_127_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_127_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_120] (rows=144002668 width=119) + Filter Operator [FIL_1499] (rows=143966864 width=119) + predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_129_date_dim_d_date_sk_min) AND DynamicValue(RS_129_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_129_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_122] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_quantity","ws_list_price"] <-Reducer 78 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1492] - Group By Operator [GBY_1491] (rows=1 width=12) + BROADCAST [RS_1498] + Group By Operator [GBY_1497] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 77 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1490] - Group By Operator [GBY_1489] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1496] + Group By Operator [GBY_1495] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1486] (rows=1957 width=4) + Select Operator [SEL_1492] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1484] - <-Reducer 22 [CONTAINS] - Reduce Output Operator [RS_1216] + Please refer to the previous Select Operator [SEL_1490] + <-Reducer 29 [CONTAINS] + Reduce Output Operator [RS_1234] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1215] (rows=304320 width=230) + Group By Operator [GBY_1233] (rows=304320 width=230) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1214] (rows=121728 width=220) + Top N Key Operator [TNK_1232] (rows=121728 width=220) keys:_col0, _col1, _col2, _col3,top n:100 - Select Operator [SEL_1212] (rows=40576 width=218) + Select Operator [SEL_1230] (rows=40576 width=218) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1211] (rows=40576 width=243) + Filter Operator [FIL_1229] (rows=40576 width=243) predicate:(_col3 > _col5) - Merge Join Operator [MERGEJOIN_1210] (rows=121728 width=243) + Merge Join Operator [MERGEJOIN_1228] (rows=121728 width=243) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1394] - Filter Operator [FIL_1393] (rows=121728 width=131) + <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1434] + Filter Operator [FIL_1433] (rows=121728 width=131) predicate:_col3 is not null - Group By Operator [GBY_1392] (rows=121728 width=131) + Group By Operator [GBY_1432] (rows=121728 width=131) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_382] + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_388] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_381] (rows=243456 width=131) + Group By Operator [GBY_387] (rows=243456 width=131) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_379] (rows=3942084 width=126) + Select Operator [SEL_385] (rows=3942084 width=126) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1189] (rows=3942084 width=126) - Conds:RS_376._col1=RS_1346._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1346] + Merge Join Operator [MERGEJOIN_1195] (rows=3942084 width=126) + Conds:RS_1329._col0=RS_383._col2(Inner),Output:["_col1","_col2","_col3","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1329] PartitionCols:_col0 - Select Operator [SEL_1337] (rows=462000 width=15) + Select Operator [SEL_1320] (rows=462000 width=15) Output:["_col0","_col1","_col2","_col3"] - Please refer to the previous TableScan [TS_81] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_376] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1188] (rows=3942084 width=118) - Conds:RS_373._col1=RS_374._col0(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_373] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1180] (rows=3942084 width=118) - Conds:RS_1387._col0=RS_1313._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1313] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1308] - <-Map 80 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1387] - PartitionCols:_col0 - Select Operator [SEL_1386] (rows=143966864 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1385] (rows=143966864 width=123) - predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_371_date_dim_d_date_sk_min) AND DynamicValue(RS_371_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_371_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_287] (rows=144002668 width=123) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_quantity","ws_list_price"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1384] - Group By Operator [GBY_1383] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1320] - Group By Operator [GBY_1317] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1314] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1308] - <-Reducer 38 [SIMPLE_EDGE] - SHUFFLE [RS_374] + Please refer to the previous TableScan [TS_0] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_383] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_1194] (rows=3942084 width=118) + Conds:RS_378._col0=RS_379._col1(Inner),Output:["_col2","_col3","_col4"] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_378] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1187] (rows=724 width=4) - Conds:RS_1353._col1, _col2, _col3=RS_1391._col0, _col1, _col2(Inner),Output:["_col0"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1353] + Merge Join Operator [MERGEJOIN_1192] (rows=724 width=4) + Conds:RS_1336._col1, _col2, _col3=RS_1426._col0, _col1, _col2(Inner),Output:["_col0"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1336] PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1347] (rows=458612 width=15) + Select Operator [SEL_1330] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1338] (rows=458612 width=15) + Filter Operator [FIL_1321] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) - Please refer to the previous TableScan [TS_81] - <-Reducer 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1391] + Please refer to the previous TableScan [TS_0] + <-Reducer 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1426] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1390] (rows=1 width=12) + Select Operator [SEL_1425] (rows=1 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1389] (rows=1 width=20) + Filter Operator [FIL_1424] (rows=1 width=20) predicate:(_col3 = 3L) - Group By Operator [GBY_1388] (rows=121728 width=19) + Group By Operator [GBY_1423] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 36 [SIMPLE_EDGE] - <-Reducer 35 [CONTAINS] vectorized - Reduce Output Operator [RS_1432] + <-Union 23 [SIMPLE_EDGE] + <-Reducer 22 [CONTAINS] vectorized + Reduce Output Operator [RS_1422] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1431] (rows=121728 width=19) + Group By Operator [GBY_1421] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1430] (rows=121728 width=19) + Group By Operator [GBY_1420] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 26 [SIMPLE_EDGE] + <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_313] PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_25] - <-Reducer 45 [CONTAINS] vectorized - Reduce Output Operator [RS_1446] + Please refer to the previous Group By Operator [GBY_21] + <-Reducer 33 [CONTAINS] vectorized + Reduce Output Operator [RS_1452] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1445] (rows=121728 width=19) + Group By Operator [GBY_1451] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1444] (rows=121728 width=19) + Group By Operator [GBY_1450] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 42 [SIMPLE_EDGE] + <-Reducer 30 [SIMPLE_EDGE] SHUFFLE [RS_333] PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_45] - <-Reducer 51 [CONTAINS] vectorized - Reduce Output Operator [RS_1460] + Please refer to the previous Group By Operator [GBY_41] + <-Reducer 37 [CONTAINS] vectorized + Reduce Output Operator [RS_1466] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1459] (rows=121728 width=19) + Group By Operator [GBY_1465] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1458] (rows=121728 width=19) + Group By Operator [GBY_1464] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 48 [SIMPLE_EDGE] + <-Reducer 34 [SIMPLE_EDGE] SHUFFLE [RS_354] PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_66] - <-Reducer 62 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1398] - Select Operator [SEL_1397] (rows=1 width=112) + Please refer to the previous Group By Operator [GBY_62] + <-Reducer 64 [SIMPLE_EDGE] + SHUFFLE [RS_379] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1193] (rows=3942084 width=118) + Conds:RS_1431._col0=RS_1347._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 60 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1347] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1342] + <-Map 80 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1431] + PartitionCols:_col0 + Select Operator [SEL_1430] (rows=143966864 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1429] (rows=143966864 width=123) + predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_375_date_dim_d_date_sk_min) AND DynamicValue(RS_375_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_375_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_368] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_quantity","ws_list_price"] + <-Reducer 65 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1428] + Group By Operator [GBY_1427] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 60 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1354] + Group By Operator [GBY_1351] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1348] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1342] + <-Reducer 55 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1438] + Select Operator [SEL_1437] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_1396] (rows=1 width=120) + Filter Operator [FIL_1436] (rows=1 width=120) predicate:CAST( (_col0 / _col1) AS decimal(22,6)) is not null - Group By Operator [GBY_1395] (rows=1 width=120) + Group By Operator [GBY_1435] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] - <-Union 61 [CUSTOM_SIMPLE_EDGE] - <-Reducer 60 [CONTAINS] - Reduce Output Operator [RS_1270] - Group By Operator [GBY_1269] (rows=1 width=120) + <-Union 54 [CUSTOM_SIMPLE_EDGE] + <-Reducer 53 [CONTAINS] + Reduce Output Operator [RS_1276] + Group By Operator [GBY_1275] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1268] (rows=26270325 width=44) + Select Operator [SEL_1274] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1266] (rows=14736682 width=0) + Select Operator [SEL_1272] (rows=14736682 width=0) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1265] (rows=14736682 width=0) - Conds:RS_1467._col0=RS_1410._col0(Inner),Output:["_col1","_col2"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1410] + Merge Join Operator [MERGEJOIN_1271] (rows=14736682 width=0) + Conds:RS_1473._col0=RS_1384._col0(Inner),Output:["_col1","_col2"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1384] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1400] + Please refer to the previous Select Operator [SEL_1374] <-Map 66 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1467] + SHUFFLE [RS_1473] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1464] + Please refer to the previous Select Operator [SEL_1470] <-Reducer 70 [CONTAINS] - Reduce Output Operator [RS_1288] - Group By Operator [GBY_1287] (rows=1 width=120) + Reduce Output Operator [RS_1294] + Group By Operator [GBY_1293] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1286] (rows=26270325 width=44) + Select Operator [SEL_1292] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1284] (rows=7676736 width=94) + Select Operator [SEL_1290] (rows=7676736 width=94) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1283] (rows=7676736 width=94) - Conds:RS_1482._col0=RS_1473._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1289] (rows=7676736 width=94) + Conds:RS_1488._col0=RS_1479._col0(Inner),Output:["_col1","_col2"] <-Map 71 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1473] + PARTITION_ONLY_SHUFFLE [RS_1479] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1469] + Please refer to the previous Select Operator [SEL_1475] <-Map 67 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1482] + SHUFFLE [RS_1488] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1479] + Please refer to the previous Select Operator [SEL_1485] <-Reducer 76 [CONTAINS] - Reduce Output Operator [RS_1306] - Group By Operator [GBY_1305] (rows=1 width=120) + Reduce Output Operator [RS_1312] + Group By Operator [GBY_1311] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1304] (rows=26270325 width=44) + Select Operator [SEL_1310] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1302] (rows=3856907 width=114) + Select Operator [SEL_1308] (rows=3856907 width=114) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1301] (rows=3856907 width=114) - Conds:RS_1497._col0=RS_1488._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1307] (rows=3856907 width=114) + Conds:RS_1503._col0=RS_1494._col0(Inner),Output:["_col1","_col2"] <-Map 77 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1488] + PARTITION_ONLY_SHUFFLE [RS_1494] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1484] + Please refer to the previous Select Operator [SEL_1490] <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1497] + SHUFFLE [RS_1503] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1494] - <-Reducer 6 [CONTAINS] - Reduce Output Operator [RS_1202] + Please refer to the previous Select Operator [SEL_1500] + <-Reducer 4 [CONTAINS] + Reduce Output Operator [RS_1208] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1201] (rows=304320 width=230) + Group By Operator [GBY_1207] (rows=304320 width=230) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1200] (rows=121728 width=220) + Top N Key Operator [TNK_1206] (rows=121728 width=220) keys:_col0, _col1, _col2, _col3,top n:100 - Select Operator [SEL_1198] (rows=40576 width=220) + Select Operator [SEL_1204] (rows=40576 width=220) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1197] (rows=40576 width=243) + Filter Operator [FIL_1203] (rows=40576 width=243) predicate:(_col3 > _col5) - Merge Join Operator [MERGEJOIN_1196] (rows=121728 width=243) + Merge Join Operator [MERGEJOIN_1202] (rows=121728 width=243) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1356] - Filter Operator [FIL_1355] (rows=121728 width=131) + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1362] + Filter Operator [FIL_1361] (rows=121728 width=131) predicate:_col3 is not null - Group By Operator [GBY_1354] (rows=121728 width=131) + Group By Operator [GBY_1360] (rows=121728 width=131) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_95] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_97] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_94] (rows=121728 width=131) + Group By Operator [GBY_96] (rows=121728 width=131) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_92] (rows=15062131 width=11) + Select Operator [SEL_94] (rows=15062131 width=11) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1163] (rows=15062131 width=11) - Conds:RS_89._col1=RS_1339._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1339] + Merge Join Operator [MERGEJOIN_1169] (rows=15062131 width=11) + Conds:RS_1322._col0=RS_92._col2(Inner),Output:["_col1","_col2","_col3","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1322] PartitionCols:_col0 - Select Operator [SEL_1330] (rows=462000 width=15) + Select Operator [SEL_1313] (rows=462000 width=15) Output:["_col0","_col1","_col2","_col3"] - Please refer to the previous TableScan [TS_81] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_89] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1162] (rows=15062131 width=4) - Conds:RS_86._col1=RS_87._col0(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_86] + Please refer to the previous TableScan [TS_0] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_92] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_1168] (rows=15062131 width=4) + Conds:RS_87._col0=RS_88._col1(Inner),Output:["_col2","_col3","_col4"] + <-Reducer 59 [SIMPLE_EDGE] + SHUFFLE [RS_88] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1154] (rows=15062131 width=4) - Conds:RS_1325._col0=RS_1309._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1309] + Merge Join Operator [MERGEJOIN_1167] (rows=15062131 width=4) + Conds:RS_1359._col0=RS_1343._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 60 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1343] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1308] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1325] + Please refer to the previous Select Operator [SEL_1342] + <-Map 58 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1359] PartitionCols:_col0 - Select Operator [SEL_1324] (rows=550076554 width=118) + Select Operator [SEL_1358] (rows=550076554 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1323] (rows=550076554 width=118) + Filter Operator [FIL_1357] (rows=550076554 width=118) predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_84_date_dim_d_date_sk_min) AND DynamicValue(RS_84_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_84_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=118) + TableScan [TS_77] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_quantity","ss_list_price"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1322] - Group By Operator [GBY_1321] (rows=1 width=12) + <-Reducer 61 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1356] + Group By Operator [GBY_1355] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1318] - Group By Operator [GBY_1315] (rows=1 width=12) + <-Map 60 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1352] + Group By Operator [GBY_1349] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1310] (rows=50 width=4) + Select Operator [SEL_1344] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1308] - <-Reducer 30 [SIMPLE_EDGE] + Please refer to the previous Select Operator [SEL_1342] + <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_87] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1161] (rows=724 width=4) - Conds:RS_1348._col1, _col2, _col3=RS_1329._col0, _col1, _col2(Inner),Output:["_col0"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1348] + Merge Join Operator [MERGEJOIN_1166] (rows=724 width=4) + Conds:RS_1331._col1, _col2, _col3=RS_1340._col0, _col1, _col2(Inner),Output:["_col0"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1331] PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1340] (rows=458612 width=15) + Select Operator [SEL_1323] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1331] (rows=458612 width=15) + Filter Operator [FIL_1314] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) - Please refer to the previous TableScan [TS_81] - <-Reducer 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1329] + Please refer to the previous TableScan [TS_0] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1340] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1328] (rows=1 width=12) + Select Operator [SEL_1339] (rows=1 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1327] (rows=1 width=20) + Filter Operator [FIL_1338] (rows=1 width=20) predicate:(_col3 = 3L) - Group By Operator [GBY_1326] (rows=121728 width=19) + Group By Operator [GBY_1337] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 28 [SIMPLE_EDGE] - <-Reducer 27 [CONTAINS] vectorized - Reduce Output Operator [RS_1426] + <-Union 12 [SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] vectorized + Reduce Output Operator [RS_1400] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1425] (rows=121728 width=19) + Group By Operator [GBY_1399] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1424] (rows=121728 width=19) + Group By Operator [GBY_1398] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_26] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_22] PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_25] - <-Reducer 43 [CONTAINS] vectorized - Reduce Output Operator [RS_1440] + Please refer to the previous Group By Operator [GBY_21] + <-Reducer 31 [CONTAINS] vectorized + Reduce Output Operator [RS_1446] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1439] (rows=121728 width=19) + Group By Operator [GBY_1445] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1438] (rows=121728 width=19) + Group By Operator [GBY_1444] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 42 [SIMPLE_EDGE] - SHUFFLE [RS_46] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_42] PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_45] - <-Reducer 49 [CONTAINS] vectorized - Reduce Output Operator [RS_1454] + Please refer to the previous Group By Operator [GBY_41] + <-Reducer 35 [CONTAINS] vectorized + Reduce Output Operator [RS_1460] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1453] (rows=121728 width=19) + Group By Operator [GBY_1459] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1452] (rows=121728 width=19) + Group By Operator [GBY_1458] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 48 [SIMPLE_EDGE] - SHUFFLE [RS_67] + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_63] PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_66] - <-Reducer 55 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1360] - Select Operator [SEL_1359] (rows=1 width=112) + Please refer to the previous Group By Operator [GBY_62] + <-Reducer 48 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1366] + Select Operator [SEL_1365] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_1358] (rows=1 width=120) + Filter Operator [FIL_1364] (rows=1 width=120) predicate:CAST( (_col0 / _col1) AS decimal(22,6)) is not null - Group By Operator [GBY_1357] (rows=1 width=120) + Group By Operator [GBY_1363] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] - <-Union 54 [CUSTOM_SIMPLE_EDGE] - <-Reducer 53 [CONTAINS] - Reduce Output Operator [RS_1258] - Group By Operator [GBY_1257] (rows=1 width=120) + <-Union 47 [CUSTOM_SIMPLE_EDGE] + <-Reducer 46 [CONTAINS] + Reduce Output Operator [RS_1264] + Group By Operator [GBY_1263] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1256] (rows=26270325 width=44) + Select Operator [SEL_1262] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1254] (rows=14736682 width=0) + Select Operator [SEL_1260] (rows=14736682 width=0) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1253] (rows=14736682 width=0) - Conds:RS_1465._col0=RS_1407._col0(Inner),Output:["_col1","_col2"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1407] + Merge Join Operator [MERGEJOIN_1259] (rows=14736682 width=0) + Conds:RS_1471._col0=RS_1381._col0(Inner),Output:["_col1","_col2"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1381] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1400] + Please refer to the previous Select Operator [SEL_1374] <-Map 66 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1465] + SHUFFLE [RS_1471] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1464] + Please refer to the previous Select Operator [SEL_1470] <-Reducer 68 [CONTAINS] - Reduce Output Operator [RS_1276] - Group By Operator [GBY_1275] (rows=1 width=120) + Reduce Output Operator [RS_1282] + Group By Operator [GBY_1281] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1274] (rows=26270325 width=44) + Select Operator [SEL_1280] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1272] (rows=7676736 width=94) + Select Operator [SEL_1278] (rows=7676736 width=94) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1271] (rows=7676736 width=94) - Conds:RS_1480._col0=RS_1470._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1277] (rows=7676736 width=94) + Conds:RS_1486._col0=RS_1476._col0(Inner),Output:["_col1","_col2"] <-Map 71 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1470] + PARTITION_ONLY_SHUFFLE [RS_1476] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1469] + Please refer to the previous Select Operator [SEL_1475] <-Map 67 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1480] + SHUFFLE [RS_1486] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1479] + Please refer to the previous Select Operator [SEL_1485] <-Reducer 74 [CONTAINS] - Reduce Output Operator [RS_1294] - Group By Operator [GBY_1293] (rows=1 width=120) + Reduce Output Operator [RS_1300] + Group By Operator [GBY_1299] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1292] (rows=26270325 width=44) + Select Operator [SEL_1298] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1290] (rows=3856907 width=114) + Select Operator [SEL_1296] (rows=3856907 width=114) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1289] (rows=3856907 width=114) - Conds:RS_1495._col0=RS_1485._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1295] (rows=3856907 width=114) + Conds:RS_1501._col0=RS_1491._col0(Inner),Output:["_col1","_col2"] <-Map 77 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1485] + PARTITION_ONLY_SHUFFLE [RS_1491] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1484] + Please refer to the previous Select Operator [SEL_1490] <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1495] + SHUFFLE [RS_1501] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1494] + Please refer to the previous Select Operator [SEL_1500] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query16.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query16.q.out index f9b2dc1c42..1683f40c8c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query16.q.out @@ -73,147 +73,149 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 12 (BROADCAST_EDGE) -Map 14 <- Reducer 9 (BROADCAST_EDGE) -Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Map 10 <- Reducer 9 (BROADCAST_EDGE) +Map 14 <- Reducer 8 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) Reducer 16 <- Map 15 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 1 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_158] - Group By Operator [GBY_157] (rows=1 width=232) + Reducer 7 vectorized + File Output Operator [FS_159] + Group By Operator [GBY_158] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_156] - Group By Operator [GBY_155] (rows=1 width=232) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_157] + Group By Operator [GBY_156] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_154] (rows=5150256 width=228) + Group By Operator [GBY_155] (rows=5150256 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_69] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_70] PartitionCols:_col0 - Group By Operator [GBY_68] (rows=5150256 width=228) + Group By Operator [GBY_69] (rows=5150256 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 - Select Operator [SEL_41] (rows=5150256 width=214) + Select Operator [SEL_42] (rows=5150256 width=214) Output:["_col4","_col5","_col6"] - Filter Operator [FIL_40] (rows=5150256 width=214) + Filter Operator [FIL_41] (rows=5150256 width=214) predicate:_col13 is null - Merge Join Operator [MERGEJOIN_125] (rows=10300512 width=214) - Conds:RS_37._col4=RS_153._col1(Left Outer),Output:["_col4","_col5","_col6","_col13"] + Merge Join Operator [MERGEJOIN_126] (rows=10300512 width=214) + Conds:RS_38._col4=RS_154._col1(Left Outer),Output:["_col4","_col5","_col6","_col13"] <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] + SHUFFLE [RS_154] PartitionCols:_col1 - Select Operator [SEL_152] (rows=18238808 width=8) + Select Operator [SEL_153] (rows=18238808 width=8) Output:["_col0","_col1"] - Group By Operator [GBY_151] (rows=18238808 width=4) + Group By Operator [GBY_152] (rows=18238808 width=4) Output:["_col0"],keys:KEY._col0 <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_150] + SHUFFLE [RS_151] PartitionCols:_col0 - Group By Operator [GBY_149] (rows=28798881 width=4) + Group By Operator [GBY_150] (rows=28798881 width=4) Output:["_col0"],keys:cr_order_number - TableScan [TS_25] (rows=28798881 width=4) + TableScan [TS_26] (rows=28798881 width=4) default@catalog_returns,cr1,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_order_number"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_37] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_38] PartitionCols:_col4 - Select Operator [SEL_36] (rows=5150256 width=200) + Select Operator [SEL_37] (rows=5150256 width=200) Output:["_col4","_col5","_col6"] - Merge Join Operator [MERGEJOIN_124] (rows=5150256 width=202) - Conds:RS_33._col4=RS_148._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} - <-Reducer 4 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_33] + Merge Join Operator [MERGEJOIN_125] (rows=5150256 width=202) + Conds:RS_34._col4=RS_149._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} + <-Reducer 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_34] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_123] (rows=5150256 width=200) - Conds:RS_18._col2=RS_142._col0(Inner),Output:["_col3","_col4","_col5","_col6"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_142] - PartitionCols:_col0 - Select Operator [SEL_141] (rows=10 width=102) - Output:["_col0"] - Filter Operator [FIL_140] (rows=10 width=102) - predicate:(cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') - TableScan [TS_9] (rows=60 width=102) - default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_county"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_122] (rows=30901534 width=230) - Conds:RS_15._col1=RS_128._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] - PartitionCols:_col0 - Select Operator [SEL_127] (rows=784314 width=90) - Output:["_col0"] - Filter Operator [FIL_126] (rows=784314 width=90) - predicate:(ca_state = 'NY') - TableScan [TS_6] (rows=40000000 width=90) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_121] (rows=31519516 width=234) - Conds:RS_136._col0=RS_139._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_136] - PartitionCols:_col0 - Select Operator [SEL_135] (rows=283695062 width=243) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_134] (rows=283695062 width=243) - predicate:(cs_ship_date_sk is not null and cs_call_center_sk is not null and cs_ship_addr_sk is not null and cs_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(cs_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) - TableScan [TS_0] (rows=287989836 width=243) - default@catalog_sales,cs1,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_133] - Group By Operator [GBY_132] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_131] - Group By Operator [GBY_130] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_129] (rows=784314 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_127] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_139] - PartitionCols:_col0 - Select Operator [SEL_138] (rows=8116 width=98) - Output:["_col0"] - Filter Operator [FIL_137] (rows=8116 width=98) - predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' - TableScan [TS_3] (rows=73049 width=98) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + Select Operator [SEL_22] (rows=5150256 width=200) + Output:["_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_124] (rows=5150256 width=200) + Conds:RS_19._col4=RS_143._col0(Inner),Output:["_col5","_col6","_col7","_col8"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_143] + PartitionCols:_col0 + Select Operator [SEL_142] (rows=10 width=102) + Output:["_col0"] + Filter Operator [FIL_141] (rows=10 width=102) + predicate:(cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') + TableScan [TS_13] (rows=60 width=102) + default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_county"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_123] (rows=30901534 width=230) + Conds:RS_129._col0=RS_17._col1(Inner),Output:["_col4","_col5","_col6","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_129] + PartitionCols:_col0 + Select Operator [SEL_128] (rows=784314 width=90) + Output:["_col0"] + Filter Operator [FIL_127] (rows=784314 width=90) + predicate:(ca_state = 'NY') + TableScan [TS_0] (rows=40000000 width=90) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_122] (rows=31519516 width=234) + Conds:RS_137._col0=RS_140._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_137] + PartitionCols:_col0 + Select Operator [SEL_136] (rows=283695062 width=243) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_135] (rows=283695062 width=243) + predicate:(cs_ship_date_sk is not null and cs_call_center_sk is not null and cs_ship_addr_sk is not null and cs_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(cs_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) + TableScan [TS_3] (rows=287989836 width=243) + default@catalog_sales,cs1,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_134] + Group By Operator [GBY_133] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_132] + Group By Operator [GBY_131] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_130] (rows=784314 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_128] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_140] + PartitionCols:_col0 + Select Operator [SEL_139] (rows=8116 width=98) + Output:["_col0"] + Filter Operator [FIL_138] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' + TableScan [TS_6] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_148] + SHUFFLE [RS_149] PartitionCols:_col0 - Group By Operator [GBY_147] (rows=286548719 width=7) + Group By Operator [GBY_148] (rows=286548719 width=7) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_146] (rows=286548719 width=7) + Select Operator [SEL_147] (rows=286548719 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_145] (rows=286548719 width=7) - predicate:(cs_warehouse_sk is not null and cs_order_number BETWEEN DynamicValue(RS_33_cs1_cs_order_number_min) AND DynamicValue(RS_33_cs1_cs_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_33_cs1_cs_order_number_bloom_filter))) - TableScan [TS_22] (rows=287989836 width=7) + Filter Operator [FIL_146] (rows=286548719 width=7) + predicate:(cs_warehouse_sk is not null and cs_order_number BETWEEN DynamicValue(RS_34_cs1_cs_order_number_min) AND DynamicValue(RS_34_cs1_cs_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_34_cs1_cs_order_number_bloom_filter))) + TableScan [TS_23] (rows=287989836 width=7) default@catalog_sales,cs2,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_warehouse_sk","cs_order_number"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_144] - Group By Operator [GBY_143] (rows=1 width=12) + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_145] + Group By Operator [GBY_144] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_111] - Group By Operator [GBY_110] (rows=1 width=12) + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_112] + Group By Operator [GBY_111] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_109] (rows=5150256 width=8) + Select Operator [SEL_110] (rows=5150256 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_123] + Please refer to the previous Select Operator [SEL_22] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out index 983d239dbc..04d1324b10 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out @@ -81,135 +81,135 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 9 <- Reducer 13 (BROADCAST_EDGE) -Reducer 10 <- Map 12 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 14 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Map 10 <- Reducer 14 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 12 <- Map 15 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 8 <- Reducer 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_176] - Limit [LIM_175] (rows=100 width=1165) + Reducer 5 vectorized + File Output Operator [FS_177] + Limit [LIM_176] (rows=100 width=1165) Number of rows:100 - Select Operator [SEL_174] (rows=11124280 width=1165) + Select Operator [SEL_175] (rows=11124280 width=1165) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_173] - Select Operator [SEL_172] (rows=11124280 width=1165) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_174] + Select Operator [SEL_173] (rows=11124280 width=1165) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Top N Key Operator [TNK_171] (rows=11124280 width=1229) + Top N Key Operator [TNK_172] (rows=11124280 width=1229) keys:_col3, _col2, _col1, _col0,top n:100 - Group By Operator [GBY_170] (rows=11124280 width=1229) + Group By Operator [GBY_171] (rows=11124280 width=1229) Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)","sum(VALUE._col8)","count(VALUE._col9)","sum(VALUE._col10)","count(VALUE._col11)","sum(VALUE._col12)","count(VALUE._col13)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_40] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_41] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_39] (rows=11124280 width=1229) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(_col12)","count(_col12)","sum(_col13)","count(_col13)","sum(_col14)","count(_col14)","sum(_col15)","count(_col15)","sum(_col16)","count(_col16)","sum(_col3)","count(_col3)","sum(_col19)","count(_col19)"],keys:_col21, _col5, _col6, _col7, 0L - Merge Join Operator [MERGEJOIN_145] (rows=2224856 width=816) - Conds:RS_35._col1=RS_169._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col21"] + Group By Operator [GBY_40] (rows=11124280 width=1229) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(_col14)","count(_col14)","sum(_col15)","count(_col15)","sum(_col16)","count(_col16)","sum(_col17)","count(_col17)","sum(_col18)","count(_col18)","sum(_col5)","count(_col5)","sum(_col21)","count(_col21)"],keys:_col1, _col7, _col8, _col9, 0L + Merge Join Operator [MERGEJOIN_146] (rows=2224856 width=816) + Conds:RS_36._col3=RS_170._col0(Inner),Output:["_col1","_col5","_col7","_col8","_col9","_col14","_col15","_col16","_col17","_col18","_col21"] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_169] + SHUFFLE [RS_170] PartitionCols:_col0 - Select Operator [SEL_168] (rows=1861800 width=4) + Select Operator [SEL_169] (rows=1861800 width=4) Output:["_col0"] - TableScan [TS_24] (rows=1861800 width=4) + TableScan [TS_31] (rows=1861800 width=4) default@customer_demographics,cd2,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_35] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_144] (rows=2193833 width=813) - Conds:RS_32._col11=RS_167._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col21"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_167] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_145] (rows=2193833 width=813) + Conds:RS_148._col0=RS_34._col11(Inner),Output:["_col1","_col3","_col5","_col7","_col8","_col9","_col14","_col15","_col16","_col17","_col18","_col21"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_148] PartitionCols:_col0 - Select Operator [SEL_166] (rows=462000 width=104) + Select Operator [SEL_147] (rows=462000 width=104) Output:["_col0","_col1"] - TableScan [TS_22] (rows=462000 width=104) + TableScan [TS_0] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_32] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_34] PartitionCols:_col11 - Merge Join Operator [MERGEJOIN_143] (rows=2193833 width=717) - Conds:RS_29._col0=RS_30._col1(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col11","_col12","_col13","_col14","_col15","_col16","_col19"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_30] + Merge Join Operator [MERGEJOIN_144] (rows=2193833 width=717) + Conds:RS_27._col0=RS_28._col1(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col11","_col12","_col13","_col14","_col15","_col16","_col19"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_28] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_142] (rows=15983636 width=639) - Conds:RS_18._col2=RS_165._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col11"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_165] + Merge Join Operator [MERGEJOIN_143] (rows=15983636 width=639) + Conds:RS_20._col2=RS_168._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col11"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_168] PartitionCols:_col0 - Select Operator [SEL_164] (rows=103434 width=116) + Select Operator [SEL_167] (rows=103434 width=116) Output:["_col0","_col1"] - Filter Operator [FIL_163] (rows=103434 width=187) + Filter Operator [FIL_166] (rows=103434 width=187) predicate:((cd_education_status = 'College') and (cd_gender = 'M')) - TableScan [TS_12] (rows=1861800 width=187) + TableScan [TS_14] (rows=1861800 width=187) default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_education_status","cd_dep_count"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_18] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_20] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_141] (rows=100578970 width=565) - Conds:RS_162._col0=RS_154._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_154] + Merge Join Operator [MERGEJOIN_142] (rows=100578970 width=565) + Conds:RS_165._col0=RS_157._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_157] PartitionCols:_col0 - Select Operator [SEL_153] (rows=652 width=4) + Select Operator [SEL_156] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_152] (rows=652 width=8) + Filter Operator [FIL_155] (rows=652 width=8) predicate:(d_year = 2001) - TableScan [TS_9] (rows=73049 width=8) + TableScan [TS_11] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_162] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_165] PartitionCols:_col0 - Select Operator [SEL_161] (rows=283692098 width=573) + Select Operator [SEL_164] (rows=283692098 width=573) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_160] (rows=283692098 width=466) - predicate:(cs_sold_date_sk is not null and cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_6] (rows=287989836 width=466) + Filter Operator [FIL_163] (rows=283692098 width=466) + predicate:(cs_sold_date_sk is not null and cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_18_date_dim_d_date_sk_min) AND DynamicValue(RS_18_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_18_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_8] (rows=287989836 width=466) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_bill_cdemo_sk","cs_item_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt","cs_net_profit"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_159] - Group By Operator [GBY_158] (rows=1 width=12) + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_162] + Group By Operator [GBY_161] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_157] - Group By Operator [GBY_156] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_160] + Group By Operator [GBY_159] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_155] (rows=652 width=4) + Select Operator [SEL_158] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_153] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_29] + Please refer to the previous Select Operator [SEL_156] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_27] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_140] (rows=4890586 width=371) - Conds:RS_148._col2=RS_151._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_148] + Merge Join Operator [MERGEJOIN_141] (rows=4890586 width=371) + Conds:RS_151._col2=RS_154._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] PartitionCols:_col2 - Select Operator [SEL_147] (rows=35631408 width=119) + Select Operator [SEL_150] (rows=35631408 width=119) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_146] (rows=35631408 width=19) + Filter Operator [FIL_149] (rows=35631408 width=19) predicate:((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_cdemo_sk is not null and c_current_addr_sk is not null) - TableScan [TS_0] (rows=80000000 width=19) + TableScan [TS_2] (rows=80000000 width=19) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk","c_birth_month","c_birth_year"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_151] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_154] PartitionCols:_col0 - Select Operator [SEL_150] (rows=5490196 width=285) + Select Operator [SEL_153] (rows=5490196 width=285) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_149] (rows=5490196 width=285) + Filter Operator [FIL_152] (rows=5490196 width=285) predicate:(ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') - TableScan [TS_3] (rows=40000000 width=285) + TableScan [TS_5] (rows=40000000 width=285) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state","ca_country"] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query1b.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query1b.q.out index 0fb49d6452..b0d83c886e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query1b.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query1b.q.out @@ -65,50 +65,36 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 11 <- Reducer 7 (BROADCAST_EDGE) - Map 3 <- Map 10 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (BROADCAST_EDGE), Map 3 (SIMPLE_EDGE), Reducer 8 (BROADCAST_EDGE) - Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 7 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 3 (SIMPLE_EDGE) + Map 1 <- Reducer 8 (BROADCAST_EDGE) + Map 6 <- Map 10 (BROADCAST_EDGE), Map 11 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 4 (BROADCAST_EDGE), Map 6 (SIMPLE_EDGE), Reducer 9 (BROADCAST_EDGE) + Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: store - filterExpr: (s_state = 'NM') (type: boolean) - Statistics: Num rows: 1704 Data size: 153360 Basic stats: COMPLETE Column stats: COMPLETE + alias: customer + filterExpr: (c_customer_sk BETWEEN DynamicValue(RS_49_store_returns_sr_customer_sk_min) AND DynamicValue(RS_49_store_returns_sr_customer_sk_max) and in_bloom_filter(c_customer_sk, DynamicValue(RS_49_store_returns_sr_customer_sk_bloom_filter))) (type: boolean) + Statistics: Num rows: 80000000 Data size: 8320000000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (s_state = 'NM') (type: boolean) - Statistics: Num rows: 35 Data size: 3150 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (c_customer_sk BETWEEN DynamicValue(RS_49_store_returns_sr_customer_sk_min) AND DynamicValue(RS_49_store_returns_sr_customer_sk_max) and in_bloom_filter(c_customer_sk, DynamicValue(RS_49_store_returns_sr_customer_sk_bloom_filter))) (type: boolean) + Statistics: Num rows: 80000000 Data size: 8320000000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 35 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE + expressions: c_customer_sk (type: int), c_customer_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 80000000 Data size: 8320000000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 35 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 35 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=35) - minReductionHashAggr: 0.9714286 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + Statistics: Num rows: 80000000 Data size: 8320000000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized Map 10 Map Operator Tree: @@ -133,25 +119,59 @@ STAGE PLANS: Map 11 Map Operator Tree: TableScan - alias: customer - filterExpr: (c_customer_sk BETWEEN DynamicValue(RS_47_store_returns_sr_customer_sk_min) AND DynamicValue(RS_47_store_returns_sr_customer_sk_max) and in_bloom_filter(c_customer_sk, DynamicValue(RS_47_store_returns_sr_customer_sk_bloom_filter))) (type: boolean) - Statistics: Num rows: 80000000 Data size: 8320000000 Basic stats: COMPLETE Column stats: COMPLETE + alias: date_dim + filterExpr: (d_year = 2000) (type: boolean) + Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (c_customer_sk BETWEEN DynamicValue(RS_47_store_returns_sr_customer_sk_min) AND DynamicValue(RS_47_store_returns_sr_customer_sk_max) and in_bloom_filter(c_customer_sk, DynamicValue(RS_47_store_returns_sr_customer_sk_bloom_filter))) (type: boolean) - Statistics: Num rows: 80000000 Data size: 8320000000 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (d_year = 2000) (type: boolean) + Statistics: Num rows: 652 Data size: 5216 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: c_customer_sk (type: int), c_customer_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 80000000 Data size: 8320000000 Basic stats: COMPLETE Column stats: COMPLETE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 652 Data size: 2608 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 8320000000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Statistics: Num rows: 652 Data size: 2608 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Map 4 + Map Operator Tree: + TableScan + alias: store + filterExpr: (s_state = 'NM') (type: boolean) + Statistics: Num rows: 1704 Data size: 153360 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (s_state = 'NM') (type: boolean) + Statistics: Num rows: 35 Data size: 3150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 35 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 35 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 35 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=35) + minReductionHashAggr: 0.9714286 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: vectorized - Map 3 + Map 6 Map Operator Tree: TableScan alias: store_returns @@ -172,7 +192,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3 input vertices: - 1 Map 9 + 1 Map 10 Statistics: Num rows: 16855704 Data size: 1805298496 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col3) @@ -203,7 +223,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3 input vertices: - 1 Map 10 + 1 Map 11 Statistics: Num rows: 17467258 Data size: 1870797840 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col3) @@ -220,27 +240,50 @@ STAGE PLANS: Statistics: Num rows: 17467258 Data size: 2081058800 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Execution mode: vectorized - Map 9 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: (d_year = 2000) (type: boolean) - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (d_year = 2000) (type: boolean) - Statistics: Num rows: 652 Data size: 5216 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 652 Data size: 2608 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 652 Data size: 2608 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 816091 Data size: 81609100 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: _col1 (type: string) + null sort order: z + Statistics: Num rows: 816091 Data size: 81609100 Basic stats: COMPLETE Column stats: COMPLETE + top n: 100 + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 816091 Data size: 81609100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 816091 Data size: 81609100 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 816091 Data size: 81609100 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 10000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 10000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -253,7 +296,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 4 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -277,7 +320,7 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col1, _col2, _col3 input vertices: - 0 Map 1 + 0 Map 4 Statistics: Num rows: 2369298 Data size: 272032680 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -287,76 +330,37 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col1, _col3, _col4 input vertices: - 1 Reducer 8 + 1 Reducer 9 Statistics: Num rows: 2448274 Data size: 552060636 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col3 > _col4) (type: boolean) Statistics: Num rows: 816091 Data size: 184020140 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 816091 Data size: 184020140 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 816091 Data size: 2431512 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=76429) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + outputColumnNames: _col1 + Statistics: Num rows: 816091 Data size: 184020140 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 816091 Data size: 184020140 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 816091 Data size: 2431512 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=76429) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 5 - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col7 - Statistics: Num rows: 816091 Data size: 81609100 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col7 (type: string) - null sort order: z - Statistics: Num rows: 816091 Data size: 81609100 Basic stats: COMPLETE Column stats: COMPLETE - top n: 100 - Select Operator - expressions: _col7 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 816091 Data size: 81609100 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 816091 Data size: 81609100 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Reducer 6 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 816091 Data size: 81609100 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 100 - Statistics: Num rows: 100 Data size: 10000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 10000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -369,7 +373,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 8 + Reducer 9 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query20.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query20.q.out index 601efc035f..be699db3b6 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query20.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query20.q.out @@ -65,87 +65,87 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Map 6 <- Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_83] - Limit [LIM_82] (rows=100 width=802) + Reducer 5 vectorized + File Output Operator [FS_84] + Limit [LIM_83] (rows=100 width=802) Number of rows:100 - Select Operator [SEL_81] (rows=138600 width=801) + Select Operator [SEL_82] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_80] - Select Operator [SEL_79] (rows=138600 width=801) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_81] + Select Operator [SEL_80] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Top N Key Operator [TNK_78] (rows=138600 width=689) + Top N Key Operator [TNK_79] (rows=138600 width=689) keys:_col0, _col1, _col2, _col3, ((_col5 * 100) / sum_window_0),top n:100 - PTF Operator [PTF_77] (rows=138600 width=689) + PTF Operator [PTF_78] (rows=138600 width=689) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col1"}] - Select Operator [SEL_76] (rows=138600 width=689) + Select Operator [SEL_77] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_75] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_76] PartitionCols:_col1 - Group By Operator [GBY_74] (rows=138600 width=689) + Group By Operator [GBY_75] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_16] (rows=138600 width=689) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col9, _col8, _col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_59] (rows=9551005 width=673) - Conds:RS_12._col1=RS_73._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_73] + Group By Operator [GBY_17] (rows=138600 width=689) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col8)"],keys:_col5, _col4, _col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_60] (rows=9551005 width=673) + Conds:RS_63._col0=RS_14._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_63] PartitionCols:_col0 - Select Operator [SEL_72] (rows=138600 width=581) + Select Operator [SEL_62] (rows=138600 width=581) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_71] (rows=138600 width=581) + Filter Operator [FIL_61] (rows=138600 width=581) predicate:(i_category) IN ('Jewelry', 'Sports', 'Books') - TableScan [TS_6] (rows=462000 width=581) + TableScan [TS_0] (rows=462000 width=581) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_58] (rows=31836679 width=110) - Conds:RS_70._col0=RS_62._col0(Inner),Output:["_col1","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_62] + Merge Join Operator [MERGEJOIN_59] (rows=31836679 width=110) + Conds:RS_74._col0=RS_66._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_66] PartitionCols:_col0 - Select Operator [SEL_61] (rows=8116 width=4) + Select Operator [SEL_65] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_60] (rows=8116 width=98) + Filter Operator [FIL_64] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00' AND TIMESTAMP'2001-02-11 00:00:00' - TableScan [TS_3] (rows=73049 width=98) + TableScan [TS_6] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_70] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_74] PartitionCols:_col0 - Select Operator [SEL_69] (rows=286549727 width=119) + Select Operator [SEL_73] (rows=286549727 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_68] (rows=286549727 width=119) + Filter Operator [FIL_72] (rows=286549727 width=119) predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=287989836 width=119) + TableScan [TS_3] (rows=287989836 width=119) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_67] - Group By Operator [GBY_66] (rows=1 width=12) + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_71] + Group By Operator [GBY_70] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_65] - Group By Operator [GBY_64] (rows=1 width=12) + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_69] + Group By Operator [GBY_68] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_63] (rows=8116 width=4) + Select Operator [SEL_67] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_61] + Please refer to the previous Select Operator [SEL_65] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query21.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query21.q.out index a3d6c942fa..ab52cdf942 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query21.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query21.q.out @@ -69,79 +69,79 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 7 <- Map 9 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_93] - Limit [LIM_92] (rows=100 width=216) + Reducer 4 vectorized + File Output Operator [FS_94] + Limit [LIM_93] (rows=100 width=216) Number of rows:100 - Select Operator [SEL_91] (rows=115991 width=216) + Select Operator [SEL_92] (rows=115991 width=216) Output:["_col0","_col1","_col2","_col3"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_90] - Top N Key Operator [TNK_89] (rows=115991 width=216) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_91] + Top N Key Operator [TNK_90] (rows=115991 width=216) keys:_col0, _col1,top n:100 - Filter Operator [FIL_88] (rows=115991 width=216) + Filter Operator [FIL_89] (rows=115991 width=216) predicate:(CASE WHEN ((_col2 > 0L)) THEN ((0.666667D <= (UDFToDouble(_col3) / UDFToDouble(_col2)))) ELSE (false) END and CASE WHEN ((_col2 > 0L)) THEN (((UDFToDouble(_col3) / UDFToDouble(_col2)) <= 1.5D)) ELSE (false) END) - Group By Operator [GBY_87] (rows=463966 width=216) + Group By Operator [GBY_88] (rows=463966 width=216) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_22] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_23] PartitionCols:_col0, _col1 - Group By Operator [GBY_21] (rows=463966 width=216) + Group By Operator [GBY_22] (rows=463966 width=216) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1 - Select Operator [SEL_19] (rows=463966 width=208) + Select Operator [SEL_20] (rows=463966 width=208) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_76] (rows=463966 width=208) - Conds:RS_16._col2=RS_86._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col10"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_86] + Merge Join Operator [MERGEJOIN_77] (rows=463966 width=208) + Conds:RS_79._col0=RS_18._col2(Inner),Output:["_col1","_col5","_col7","_col8","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_79] PartitionCols:_col0 - Select Operator [SEL_85] (rows=27 width=104) + Select Operator [SEL_78] (rows=27 width=104) Output:["_col0","_col1"] - TableScan [TS_8] (rows=27 width=104) + TableScan [TS_0] (rows=27 width=104) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_16] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_75] (rows=463966 width=112) - Conds:RS_13._col1=RS_84._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_84] + Merge Join Operator [MERGEJOIN_76] (rows=463966 width=112) + Conds:RS_13._col1=RS_87._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_83] (rows=51333 width=104) + Select Operator [SEL_86] (rows=51333 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_82] (rows=51333 width=215) + Filter Operator [FIL_85] (rows=51333 width=215) predicate:i_current_price BETWEEN 0.99 AND 1.49 - TableScan [TS_5] (rows=462000 width=215) + TableScan [TS_7] (rows=462000 width=215) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_current_price"] - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_13] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_74] (rows=4175715 width=18) - Conds:RS_78._col0=RS_81._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_78] + Merge Join Operator [MERGEJOIN_75] (rows=4175715 width=18) + Conds:RS_81._col0=RS_84._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_81] PartitionCols:_col0 - Select Operator [SEL_77] (rows=37584000 width=15) + Select Operator [SEL_80] (rows=37584000 width=15) Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=37584000 width=15) + TableScan [TS_2] (rows=37584000 width=15) default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_81] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_84] PartitionCols:_col0 - Select Operator [SEL_80] (rows=8116 width=12) + Select Operator [SEL_83] (rows=8116 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_79] (rows=8116 width=98) + Filter Operator [FIL_82] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' - TableScan [TS_2] (rows=73049 width=98) + TableScan [TS_4] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query22.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query22.q.out index 9ade7a15f5..66668a9024 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query22.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query22.q.out @@ -53,62 +53,62 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 vectorized - File Output Operator [FS_66] - Limit [LIM_65] (rows=100 width=397) + Reducer 4 vectorized + File Output Operator [FS_67] + Limit [LIM_66] (rows=100 width=397) Number of rows:100 - Select Operator [SEL_64] (rows=32730675 width=397) + Select Operator [SEL_65] (rows=32730675 width=397) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_63] - Select Operator [SEL_62] (rows=32730675 width=397) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_64] + Select Operator [SEL_63] (rows=32730675 width=397) Output:["_col0","_col1","_col2","_col3","_col4"] - Top N Key Operator [TNK_61] (rows=32730675 width=413) + Top N Key Operator [TNK_62] (rows=32730675 width=413) keys:(UDFToDouble(_col5) / _col6), _col3, _col0, _col1, _col2,top n:100 - Group By Operator [GBY_60] (rows=32730675 width=413) + Group By Operator [GBY_61] (rows=32730675 width=413) Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_15] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_16] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_14] (rows=32730675 width=413) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col2)","count(_col2)"],keys:_col5, _col6, _col7, _col8, 0L - Merge Join Operator [MERGEJOIN_52] (rows=6546135 width=391) - Conds:RS_10._col1=RS_59._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_59] + Group By Operator [GBY_15] (rows=32730675 width=413) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col7)","count(_col7)"],keys:_col1, _col2, _col3, _col4, 0L + Merge Join Operator [MERGEJOIN_53] (rows=6546135 width=391) + Conds:RS_55._col0=RS_12._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_55] PartitionCols:_col0 - Select Operator [SEL_58] (rows=462000 width=393) + Select Operator [SEL_54] (rows=462000 width=393) Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_5] (rows=462000 width=393) + TableScan [TS_0] (rows=462000 width=393) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_10] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_51] (rows=6546135 width=6) - Conds:RS_54._col0=RS_57._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_54] + Merge Join Operator [MERGEJOIN_52] (rows=6546135 width=6) + Conds:RS_57._col0=RS_60._col0(Inner),Output:["_col1","_col2"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_57] PartitionCols:_col0 - Select Operator [SEL_53] (rows=37584000 width=11) + Select Operator [SEL_56] (rows=37584000 width=11) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=37584000 width=11) + TableScan [TS_2] (rows=37584000 width=11) default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_57] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_60] PartitionCols:_col0 - Select Operator [SEL_56] (rows=317 width=4) + Select Operator [SEL_59] (rows=317 width=4) Output:["_col0"] - Filter Operator [FIL_55] (rows=317 width=8) + Filter Operator [FIL_58] (rows=317 width=8) predicate:d_month_seq BETWEEN 1212 AND 1223 - TableScan [TS_2] (rows=73049 width=8) + TableScan [TS_4] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query23.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query23.q.out index 13d1a99f4b..b79fbe510d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query23.q.out @@ -1,5 +1,5 @@ -Warning: Shuffle Join MERGEJOIN[358][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 17' is a cross product -Warning: Shuffle Join MERGEJOIN[360][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 23' is a cross product +Warning: Shuffle Join MERGEJOIN[360][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[362][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 13' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -119,356 +119,356 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Map 15 <- Reducer 7 (BROADCAST_EDGE) -Map 19 <- Reducer 26 (BROADCAST_EDGE) +Map 1 <- Reducer 21 (BROADCAST_EDGE) +Map 19 <- Reducer 23 (BROADCAST_EDGE) Map 27 <- Reducer 33 (BROADCAST_EDGE) -Map 35 <- Reducer 14 (BROADCAST_EDGE) -Map 36 <- Reducer 13 (BROADCAST_EDGE) -Reducer 10 <- Map 35 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 13 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE), Reducer 22 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Reducer 22 (CUSTOM_SIMPLE_EDGE), Reducer 37 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Reducer 23 (SIMPLE_EDGE) -Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) +Map 35 <- Reducer 25 (BROADCAST_EDGE) +Map 37 <- Reducer 26 (BROADCAST_EDGE) +Map 9 <- Reducer 18 (BROADCAST_EDGE) +Reducer 10 <- Map 17 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 36 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 22 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) +Reducer 25 <- Reducer 24 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 22 (CUSTOM_SIMPLE_EDGE) Reducer 28 <- Map 27 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) Reducer 29 <- Map 34 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) -Reducer 3 <- Reducer 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 30 <- Reducer 29 (SIMPLE_EDGE) Reducer 31 <- Reducer 29 (SIMPLE_EDGE) Reducer 33 <- Map 32 (CUSTOM_SIMPLE_EDGE) -Reducer 37 <- Map 36 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 36 <- Map 35 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 30 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 8 <- Union 7 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_440] - Group By Operator [GBY_439] (rows=1 width=112) + Reducer 8 vectorized + File Output Operator [FS_442] + Group By Operator [GBY_441] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] - <-Union 5 [CUSTOM_SIMPLE_EDGE] - <-Reducer 12 [CONTAINS] - Reduce Output Operator [RS_373] - Group By Operator [GBY_372] (rows=1 width=112) + <-Union 7 [CUSTOM_SIMPLE_EDGE] + <-Reducer 16 [CONTAINS] + Reduce Output Operator [RS_375] + Group By Operator [GBY_374] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_370] (rows=3941102 width=112) + Select Operator [SEL_372] (rows=3941102 width=112) Output:["_col0"] - Merge Join Operator [MERGEJOIN_369] (rows=3941102 width=114) - Conds:RS_152._col1=RS_462._col0(Left Semi),Output:["_col3","_col4"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_152] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_361] (rows=3941102 width=118) - Conds:RS_147._col2=RS_456._col0(Inner),Output:["_col1","_col3","_col4"] - <-Reducer 10 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_147] + Merge Join Operator [MERGEJOIN_371] (rows=3941102 width=114) + Conds:RS_154._col2=RS_464._col0(Left Semi),Output:["_col4","_col5"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_154] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_363] (rows=3941102 width=118) + Conds:RS_458._col0=RS_150._col2(Inner),Output:["_col2","_col4","_col5"] + <-Reducer 24 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_150] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_354] (rows=3941102 width=122) - Conds:RS_445._col0=RS_378._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_378] + Merge Join Operator [MERGEJOIN_357] (rows=3941102 width=122) + Conds:RS_447._col0=RS_380._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_380] PartitionCols:_col0 - Select Operator [SEL_375] (rows=50 width=4) + Select Operator [SEL_377] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_374] (rows=50 width=12) + Filter Operator [FIL_376] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 1)) - TableScan [TS_3] (rows=73049 width=12) + TableScan [TS_42] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 35 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_445] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_447] PartitionCols:_col0 - Select Operator [SEL_444] (rows=143930993 width=127) + Select Operator [SEL_446] (rows=143930993 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_443] (rows=143930993 width=127) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_145_date_dim_d_date_sk_min) AND DynamicValue(RS_145_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_145_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_78] (rows=144002668 width=127) + Filter Operator [FIL_445] (rows=143930993 width=127) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_125_date_dim_d_date_sk_min) AND DynamicValue(RS_125_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_125_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_118] (rows=144002668 width=127) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_quantity","ws_list_price"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_442] - Group By Operator [GBY_441] (rows=1 width=12) + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_444] + Group By Operator [GBY_443] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_383] - Group By Operator [GBY_381] (rows=1 width=12) + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_385] + Group By Operator [GBY_383] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_379] (rows=50 width=4) + Select Operator [SEL_381] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_375] - <-Reducer 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_456] + Please refer to the previous Select Operator [SEL_377] + <-Reducer 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_458] PartitionCols:_col0 - Group By Operator [GBY_455] (rows=235937 width=3) + Group By Operator [GBY_457] (rows=235937 width=3) Output:["_col0"],keys:KEY._col0 - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_120] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_115] PartitionCols:_col0 - Group By Operator [GBY_119] (rows=235937 width=3) + Group By Operator [GBY_114] (rows=235937 width=3) Output:["_col0"],keys:_col0 - Select Operator [SEL_118] (rows=471875 width=227) + Select Operator [SEL_113] (rows=471875 width=227) Output:["_col0"] - Filter Operator [FIL_117] (rows=471875 width=227) + Filter Operator [FIL_112] (rows=471875 width=227) predicate:(_col1 > _col2) - Merge Join Operator [MERGEJOIN_360] (rows=1415626 width=227) + Merge Join Operator [MERGEJOIN_362] (rows=1415626 width=227) Conds:(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 22 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_417] - Select Operator [SEL_415] (rows=1 width=112) + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_419] + Select Operator [SEL_417] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_414] (rows=1 width=112) + Filter Operator [FIL_416] (rows=1 width=112) predicate:_col0 is not null - Group By Operator [GBY_413] (rows=1 width=112) + Group By Operator [GBY_415] (rows=1 width=112) Output:["_col0"],aggregations:["max(VALUE._col0)"] - <-Reducer 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_412] - Group By Operator [GBY_411] (rows=1 width=112) + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_414] + Group By Operator [GBY_413] (rows=1 width=112) Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_410] (rows=50562 width=112) + Select Operator [SEL_412] (rows=50562 width=112) Output:["_col1"] - Group By Operator [GBY_409] (rows=50562 width=112) + Group By Operator [GBY_411] (rows=50562 width=112) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_26] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_20] PartitionCols:_col0 - Group By Operator [GBY_25] (rows=455058 width=112) + Group By Operator [GBY_19] (rows=455058 width=112) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col1 - Merge Join Operator [MERGEJOIN_351] (rows=18762463 width=112) - Conds:RS_408._col0=RS_400._col0(Inner),Output:["_col1","_col2"] - <-Map 25 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_400] + Merge Join Operator [MERGEJOIN_352] (rows=18762463 width=112) + Conds:RS_410._col0=RS_402._col0(Inner),Output:["_col1","_col2"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_402] PartitionCols:_col0 - Select Operator [SEL_399] (rows=2609 width=4) + Select Operator [SEL_401] (rows=2609 width=4) Output:["_col0"] - Filter Operator [FIL_398] (rows=2609 width=8) + Filter Operator [FIL_400] (rows=2609 width=8) predicate:(d_year) IN (1999, 2000, 2001, 2002) - TableScan [TS_18] (rows=73049 width=8) + TableScan [TS_12] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_408] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_410] PartitionCols:_col0 - Select Operator [SEL_407] (rows=525327388 width=119) + Select Operator [SEL_409] (rows=525327388 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_406] (rows=525327388 width=118) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_15] (rows=575995635 width=118) + Filter Operator [FIL_408] (rows=525327388 width=118) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_9] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_405] - Group By Operator [GBY_404] (rows=1 width=12) + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_407] + Group By Operator [GBY_406] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_403] - Group By Operator [GBY_402] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_405] + Group By Operator [GBY_404] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_401] (rows=2609 width=4) + Select Operator [SEL_403] (rows=2609 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_399] - <-Reducer 37 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_454] - Filter Operator [FIL_453] (rows=1415626 width=115) + Please refer to the previous Select Operator [SEL_401] + <-Reducer 36 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_456] + Filter Operator [FIL_455] (rows=1415626 width=115) predicate:_col1 is not null - Group By Operator [GBY_452] (rows=1415626 width=115) + Group By Operator [GBY_454] (rows=1415626 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_451] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_453] PartitionCols:_col0 - Group By Operator [GBY_450] (rows=550080312 width=115) + Group By Operator [GBY_452] (rows=550080312 width=115) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_449] (rows=550080312 width=114) + Select Operator [SEL_451] (rows=550080312 width=114) Output:["_col0","_col1"] - Filter Operator [FIL_448] (rows=550080312 width=114) - predicate:(ss_customer_sk is not null and ss_customer_sk BETWEEN DynamicValue(RS_147_web_sales_ws_bill_customer_sk_min) AND DynamicValue(RS_147_web_sales_ws_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_147_web_sales_ws_bill_customer_sk_bloom_filter))) - TableScan [TS_84] (rows=575995635 width=114) + Filter Operator [FIL_450] (rows=550080312 width=114) + predicate:(ss_customer_sk is not null and ss_customer_sk BETWEEN DynamicValue(RS_150_web_sales_ws_bill_customer_sk_min) AND DynamicValue(RS_150_web_sales_ws_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_150_web_sales_ws_bill_customer_sk_bloom_filter))) + TableScan [TS_79] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_447] - Group By Operator [GBY_446] (rows=1 width=12) + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_449] + Group By Operator [GBY_448] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 10 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_319] - Group By Operator [GBY_318] (rows=1 width=12) + <-Reducer 24 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_291] + Group By Operator [GBY_290] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_317] (rows=3941102 width=7) + Select Operator [SEL_289] (rows=3941102 width=7) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_354] + Please refer to the previous Merge Join Operator [MERGEJOIN_357] <-Reducer 31 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_462] + SHUFFLE [RS_464] PartitionCols:_col0 - Group By Operator [GBY_461] (rows=2235 width=4) + Group By Operator [GBY_463] (rows=2235 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_460] (rows=1943705 width=4) + Select Operator [SEL_462] (rows=1943705 width=4) Output:["_col0"] - Filter Operator [FIL_459] (rows=1943705 width=106) + Filter Operator [FIL_461] (rows=1943705 width=106) predicate:(_col2 > 4L) - Select Operator [SEL_458] (rows=5831115 width=106) + Select Operator [SEL_460] (rows=5831115 width=106) Output:["_col0","_col2"] - Group By Operator [GBY_457] (rows=5831115 width=106) + Group By Operator [GBY_459] (rows=5831115 width=106) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_139] + SHUFFLE [RS_144] PartitionCols:_col0, _col1 - Group By Operator [GBY_60] (rows=19646398 width=106) + Group By Operator [GBY_64] (rows=19646398 width=106) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col3 - Merge Join Operator [MERGEJOIN_353] (rows=19646398 width=98) - Conds:RS_56._col1=RS_432._col0(Inner),Output:["_col3","_col4"] + Merge Join Operator [MERGEJOIN_355] (rows=19646398 width=98) + Conds:RS_60._col1=RS_434._col0(Inner),Output:["_col3","_col4"] <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_432] + SHUFFLE [RS_434] PartitionCols:_col0 - Select Operator [SEL_431] (rows=462000 width=188) + Select Operator [SEL_433] (rows=462000 width=188) Output:["_col0"] - TableScan [TS_51] (rows=462000 width=4) + TableScan [TS_55] (rows=462000 width=4) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk"] <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_56] + SHUFFLE [RS_60] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_352] (rows=19646398 width=98) - Conds:RS_430._col0=RS_422._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_354] (rows=19646398 width=98) + Conds:RS_432._col0=RS_424._col0(Inner),Output:["_col1","_col3"] <-Map 32 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_422] + PARTITION_ONLY_SHUFFLE [RS_424] PartitionCols:_col0 - Select Operator [SEL_421] (rows=2609 width=98) + Select Operator [SEL_423] (rows=2609 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_420] (rows=2609 width=102) + Filter Operator [FIL_422] (rows=2609 width=102) predicate:(d_year) IN (1999, 2000, 2001, 2002) - TableScan [TS_48] (rows=73049 width=102) + TableScan [TS_52] (rows=73049 width=102) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_year"] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_430] + SHUFFLE [RS_432] PartitionCols:_col0 - Select Operator [SEL_429] (rows=550076554 width=7) + Select Operator [SEL_431] (rows=550076554 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_428] (rows=550076554 width=7) - predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_54_date_dim_d_date_sk_min) AND DynamicValue(RS_54_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_54_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_45] (rows=575995635 width=7) + Filter Operator [FIL_430] (rows=550076554 width=7) + predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_58_date_dim_d_date_sk_min) AND DynamicValue(RS_58_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_58_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_49] (rows=575995635 width=7) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk"] <-Reducer 33 [BROADCAST_EDGE] vectorized - BROADCAST [RS_427] - Group By Operator [GBY_426] (rows=1 width=12) + BROADCAST [RS_429] + Group By Operator [GBY_428] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 32 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_425] - Group By Operator [GBY_424] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_427] + Group By Operator [GBY_426] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_423] (rows=2609 width=4) + Select Operator [SEL_425] (rows=2609 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_421] - <-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_368] - Group By Operator [GBY_367] (rows=1 width=112) + Please refer to the previous Select Operator [SEL_423] + <-Reducer 6 [CONTAINS] + Reduce Output Operator [RS_370] + Group By Operator [GBY_369] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_365] (rows=7751875 width=112) + Select Operator [SEL_367] (rows=7751875 width=112) Output:["_col0"] - Merge Join Operator [MERGEJOIN_364] (rows=7751875 width=94) - Conds:RS_74._col2=RS_438._col0(Left Semi),Output:["_col3","_col4"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_74] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_359] (rows=7751875 width=98) - Conds:RS_69._col1=RS_419._col0(Inner),Output:["_col2","_col3","_col4"] - <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_69] + Merge Join Operator [MERGEJOIN_366] (rows=7751875 width=94) + Conds:RS_75._col3=RS_440._col0(Left Semi),Output:["_col4","_col5"] + <-Reducer 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_440] + PartitionCols:_col0 + Group By Operator [GBY_439] (rows=2235 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_438] (rows=1943705 width=4) + Output:["_col0"] + Filter Operator [FIL_437] (rows=1943705 width=106) + predicate:(_col2 > 4L) + Select Operator [SEL_436] (rows=5831115 width=106) + Output:["_col0","_col2"] + Group By Operator [GBY_435] (rows=5831115 width=106) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col0, _col1 + Please refer to the previous Group By Operator [GBY_64] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_75] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_361] (rows=7751875 width=98) + Conds:RS_421._col0=RS_71._col1(Inner),Output:["_col3","_col4","_col5"] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_71] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_350] (rows=7751875 width=101) - Conds:RS_388._col0=RS_376._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_376] + Merge Join Operator [MERGEJOIN_353] (rows=7751875 width=101) + Conds:RS_390._col0=RS_378._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_378] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_375] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_388] + Please refer to the previous Select Operator [SEL_377] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_390] PartitionCols:_col0 - Select Operator [SEL_387] (rows=285117831 width=127) + Select Operator [SEL_389] (rows=285117831 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_386] (rows=285117831 width=127) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_67_date_dim_d_date_sk_min) AND DynamicValue(RS_67_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_67_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=287989836 width=127) + Filter Operator [FIL_388] (rows=285117831 width=127) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_46_date_dim_d_date_sk_min) AND DynamicValue(RS_46_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_46_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_39] (rows=287989836 width=127) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity","cs_list_price"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_385] - Group By Operator [GBY_384] (rows=1 width=12) + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_387] + Group By Operator [GBY_386] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_382] - Group By Operator [GBY_380] (rows=1 width=12) + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_384] + Group By Operator [GBY_382] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_377] (rows=50 width=4) + Select Operator [SEL_379] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_375] - <-Reducer 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_419] + Please refer to the previous Select Operator [SEL_377] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_421] PartitionCols:_col0 - Group By Operator [GBY_418] (rows=235937 width=3) + Group By Operator [GBY_420] (rows=235937 width=3) Output:["_col0"],keys:KEY._col0 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_42] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_36] PartitionCols:_col0 - Group By Operator [GBY_41] (rows=235937 width=3) + Group By Operator [GBY_35] (rows=235937 width=3) Output:["_col0"],keys:_col0 - Select Operator [SEL_40] (rows=471875 width=227) + Select Operator [SEL_34] (rows=471875 width=227) Output:["_col0"] - Filter Operator [FIL_39] (rows=471875 width=227) + Filter Operator [FIL_33] (rows=471875 width=227) predicate:(_col1 > _col2) - Merge Join Operator [MERGEJOIN_358] (rows=1415626 width=227) + Merge Join Operator [MERGEJOIN_360] (rows=1415626 width=227) Conds:(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 22 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_416] - Please refer to the previous Select Operator [SEL_415] - <-Reducer 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_397] - Filter Operator [FIL_396] (rows=1415626 width=115) + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_418] + Please refer to the previous Select Operator [SEL_417] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_399] + Filter Operator [FIL_398] (rows=1415626 width=115) predicate:_col1 is not null - Group By Operator [GBY_395] (rows=1415626 width=115) + Group By Operator [GBY_397] (rows=1415626 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_394] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_396] PartitionCols:_col0 - Group By Operator [GBY_393] (rows=550080312 width=115) + Group By Operator [GBY_395] (rows=550080312 width=115) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_392] (rows=550080312 width=114) + Select Operator [SEL_394] (rows=550080312 width=114) Output:["_col0","_col1"] - Filter Operator [FIL_391] (rows=550080312 width=114) - predicate:(ss_customer_sk is not null and ss_customer_sk BETWEEN DynamicValue(RS_69_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_69_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_69_catalog_sales_cs_bill_customer_sk_bloom_filter))) - TableScan [TS_6] (rows=575995635 width=114) + Filter Operator [FIL_393] (rows=550080312 width=114) + predicate:(ss_customer_sk is not null and ss_customer_sk BETWEEN DynamicValue(RS_71_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_71_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_71_catalog_sales_cs_bill_customer_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_390] - Group By Operator [GBY_389] (rows=1 width=12) + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_392] + Group By Operator [GBY_391] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_256] - Group By Operator [GBY_255] (rows=1 width=12) + <-Reducer 20 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_228] + Group By Operator [GBY_227] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_254] (rows=7751875 width=6) + Select Operator [SEL_226] (rows=7751875 width=6) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_350] - <-Reducer 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_438] - PartitionCols:_col0 - Group By Operator [GBY_437] (rows=2235 width=4) - Output:["_col0"],keys:_col0 - Select Operator [SEL_436] (rows=1943705 width=4) - Output:["_col0"] - Filter Operator [FIL_435] (rows=1943705 width=106) - predicate:(_col2 > 4L) - Select Operator [SEL_434] (rows=5831115 width=106) - Output:["_col0","_col2"] - Group By Operator [GBY_433] (rows=5831115 width=106) - Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_61] - PartitionCols:_col0, _col1 - Please refer to the previous Group By Operator [GBY_60] + Please refer to the previous Merge Join Operator [MERGEJOIN_353] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out index 87895bd1f3..0ae5053611 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[301][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[303][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain with ssales as (select c_last_name @@ -117,205 +117,205 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Reducer 20 (BROADCAST_EDGE) -Map 21 <- Reducer 16 (BROADCAST_EDGE) +Map 21 <- Reducer 15 (BROADCAST_EDGE) Reducer 10 <- Map 19 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE) Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 19 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Map 21 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 15 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 9 <- Reducer 14 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 6 - File Output Operator [FS_96] - Select Operator [SEL_95] (rows=5130 width=380) + File Output Operator [FS_98] + Select Operator [SEL_97] (rows=5130 width=380) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_94] (rows=5130 width=492) + Filter Operator [FIL_96] (rows=5130 width=492) predicate:(_col3 > _col4) - Merge Join Operator [MERGEJOIN_301] (rows=15392 width=492) + Merge Join Operator [MERGEJOIN_303] (rows=15392 width=492) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_345] - Select Operator [SEL_344] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_347] + Select Operator [SEL_346] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_343] (rows=1 width=120) + Filter Operator [FIL_345] (rows=1 width=120) predicate:CAST( (_col0 / _col1) AS decimal(21,6)) is not null - Group By Operator [GBY_342] (rows=1 width=120) + Group By Operator [GBY_344] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_341] - Group By Operator [GBY_340] (rows=1 width=120) + PARTITION_ONLY_SHUFFLE [RS_343] + Group By Operator [GBY_342] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col10)","count(_col10)"] - Select Operator [SEL_339] (rows=589731268 width=932) + Select Operator [SEL_341] (rows=589731268 width=932) Output:["_col10"] - Group By Operator [GBY_338] (rows=589731268 width=932) + Group By Operator [GBY_340] (rows=589731268 width=932) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_81] + SHUFFLE [RS_83] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Group By Operator [GBY_80] (rows=589731268 width=932) + Group By Operator [GBY_82] (rows=589731268 width=932) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col4)"],keys:_col9, _col10, _col13, _col17, _col18, _col21, _col22, _col23, _col24, _col25 - Merge Join Operator [MERGEJOIN_300] (rows=589731268 width=928) - Conds:RS_76._col0=RS_305._col0(Inner),Output:["_col4","_col9","_col10","_col13","_col17","_col18","_col21","_col22","_col23","_col24","_col25"] + Merge Join Operator [MERGEJOIN_302] (rows=589731268 width=928) + Conds:RS_78._col0=RS_307._col0(Inner),Output:["_col4","_col9","_col10","_col13","_col17","_col18","_col21","_col22","_col23","_col24","_col25"] <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_305] + SHUFFLE [RS_307] PartitionCols:_col0 - Select Operator [SEL_303] (rows=462000 width=384) + Select Operator [SEL_305] (rows=462000 width=384) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_22] (rows=462000 width=384) + TableScan [TS_23] (rows=462000 width=384) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_76] + SHUFFLE [RS_78] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_299] (rows=589731268 width=551) - Conds:RS_73._col2, _col1=RS_74._col9, _col0(Inner),Output:["_col0","_col4","_col9","_col10","_col13","_col17","_col18"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_74] + Merge Join Operator [MERGEJOIN_301] (rows=589731268 width=551) + Conds:RS_75._col2, _col1=RS_76._col9, _col0(Inner),Output:["_col0","_col4","_col9","_col10","_col13","_col17","_col18"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_76] PartitionCols:_col9, _col0 - Select Operator [SEL_21] (rows=7276996 width=724) + Select Operator [SEL_22] (rows=7276996 width=724) Output:["_col0","_col2","_col3","_col6","_col9","_col10","_col11"] - Filter Operator [FIL_20] (rows=7276996 width=724) - predicate:(_col12 <> _col3) - Merge Join Operator [MERGEJOIN_293] (rows=7276996 width=724) - Conds:RS_17._col0=RS_326._col1(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col8","_col10","_col11","_col12"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_326] + Filter Operator [FIL_21] (rows=7276996 width=724) + predicate:(_col4 <> _col8) + Merge Join Operator [MERGEJOIN_295] (rows=7276996 width=724) + Conds:RS_322._col1=RS_19._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col6","_col8","_col9","_col10","_col11"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_322] PartitionCols:_col1 - Select Operator [SEL_325] (rows=80000000 width=280) + Select Operator [SEL_321] (rows=80000000 width=280) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_324] (rows=80000000 width=280) + Filter Operator [FIL_320] (rows=80000000 width=280) predicate:c_current_addr_sk is not null - TableScan [TS_11] (rows=80000000 width=280) + TableScan [TS_5] (rows=80000000 width=280) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name","c_birth_country"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_19] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_292] (rows=611379 width=452) - Conds:RS_320._col2=RS_323._col3(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + Merge Join Operator [MERGEJOIN_294] (rows=611379 width=452) + Conds:RS_325._col2=RS_328._col3(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_325] PartitionCols:_col2 - Select Operator [SEL_319] (rows=40000000 width=363) + Select Operator [SEL_324] (rows=40000000 width=363) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_318] (rows=40000000 width=276) + Filter Operator [FIL_323] (rows=40000000 width=276) predicate:ca_zip is not null - TableScan [TS_5] (rows=40000000 width=276) + TableScan [TS_8] (rows=40000000 width=276) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_zip","ca_country"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_323] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_328] PartitionCols:_col3 - Select Operator [SEL_322] (rows=155 width=267) + Select Operator [SEL_327] (rows=155 width=267) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_321] (rows=155 width=271) + Filter Operator [FIL_326] (rows=155 width=271) predicate:((s_market_id = 7) and s_zip is not null) - TableScan [TS_8] (rows=1704 width=270) + TableScan [TS_11] (rows=1704 width=270) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_73] + SHUFFLE [RS_75] PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_296] (rows=537799798 width=118) - Conds:RS_337._col0, _col3=RS_317._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col4"] + Merge Join Operator [MERGEJOIN_298] (rows=537799798 width=118) + Conds:RS_339._col0, _col3=RS_319._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col4"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_317] + SHUFFLE [RS_319] PartitionCols:_col0, _col1 - Select Operator [SEL_315] (rows=57591150 width=8) + Select Operator [SEL_317] (rows=57591150 width=8) Output:["_col0","_col1"] TableScan [TS_3] (rows=57591150 width=8) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_337] + SHUFFLE [RS_339] PartitionCols:_col0, _col3 - Select Operator [SEL_336] (rows=525333486 width=122) + Select Operator [SEL_338] (rows=525333486 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_335] (rows=525333486 width=122) - predicate:(ss_customer_sk is not null and ss_store_sk is not null and ss_customer_sk BETWEEN DynamicValue(RS_74_customer_c_customer_sk_min) AND DynamicValue(RS_74_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_74_customer_c_customer_sk_bloom_filter))) - TableScan [TS_46] (rows=575995635 width=122) + Filter Operator [FIL_337] (rows=525333486 width=122) + predicate:(ss_customer_sk is not null and ss_store_sk is not null and ss_customer_sk BETWEEN DynamicValue(RS_76_customer_c_customer_sk_min) AND DynamicValue(RS_76_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_76_customer_c_customer_sk_bloom_filter))) + TableScan [TS_47] (rows=575995635 width=122) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_334] - Group By Operator [GBY_333] (rows=1 width=12) + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_336] + Group By Operator [GBY_335] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=6636187)"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_235] - Group By Operator [GBY_234] (rows=1 width=12) + <-Reducer 14 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_237] + Group By Operator [GBY_236] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=6636187)"] - Select Operator [SEL_233] (rows=7276996 width=4) + Select Operator [SEL_235] (rows=7276996 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_21] + Please refer to the previous Select Operator [SEL_22] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_332] - Filter Operator [FIL_331] (rows=15392 width=380) + PARTITION_ONLY_SHUFFLE [RS_334] + Filter Operator [FIL_333] (rows=15392 width=380) predicate:_col3 is not null - Select Operator [SEL_330] (rows=15392 width=380) + Select Operator [SEL_332] (rows=15392 width=380) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_329] (rows=15392 width=380) + Group By Operator [GBY_331] (rows=15392 width=380) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col9)"],keys:_col0, _col1, _col3 - Select Operator [SEL_328] (rows=86004082 width=843) + Select Operator [SEL_330] (rows=86004082 width=843) Output:["_col0","_col1","_col3","_col9"] - Group By Operator [GBY_327] (rows=86004082 width=843) + Group By Operator [GBY_329] (rows=86004082 width=843) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_36] + SHUFFLE [RS_37] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_35] (rows=86004082 width=843) + Group By Operator [GBY_36] (rows=86004082 width=843) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col4)"],keys:_col9, _col10, _col17, _col13, _col18, _col21, _col22, _col23, _col24 - Merge Join Operator [MERGEJOIN_295] (rows=86004082 width=813) - Conds:RS_31._col0=RS_306._col0(Inner),Output:["_col4","_col9","_col10","_col13","_col17","_col18","_col21","_col22","_col23","_col24"] + Merge Join Operator [MERGEJOIN_297] (rows=86004082 width=813) + Conds:RS_32._col0=RS_308._col0(Inner),Output:["_col4","_col9","_col10","_col13","_col17","_col18","_col21","_col22","_col23","_col24"] <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_306] + SHUFFLE [RS_308] PartitionCols:_col0 - Select Operator [SEL_304] (rows=7000 width=295) + Select Operator [SEL_306] (rows=7000 width=295) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_302] (rows=7000 width=384) + Filter Operator [FIL_304] (rows=7000 width=384) predicate:(i_color = 'orchid') - Please refer to the previous TableScan [TS_22] + Please refer to the previous TableScan [TS_23] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_31] + SHUFFLE [RS_32] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_294] (rows=589731268 width=551) - Conds:RS_28._col2, _col1=RS_29._col9, _col0(Inner),Output:["_col0","_col4","_col9","_col10","_col13","_col17","_col18"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_29] + Merge Join Operator [MERGEJOIN_296] (rows=589731268 width=551) + Conds:RS_29._col2, _col1=RS_30._col9, _col0(Inner),Output:["_col0","_col4","_col9","_col10","_col13","_col17","_col18"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col9, _col0 - Please refer to the previous Select Operator [SEL_21] + Please refer to the previous Select Operator [SEL_22] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_28] + SHUFFLE [RS_29] PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_291] (rows=537799798 width=118) - Conds:RS_314._col0, _col3=RS_316._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col4"] + Merge Join Operator [MERGEJOIN_293] (rows=537799798 width=118) + Conds:RS_316._col0, _col3=RS_318._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col4"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_316] + SHUFFLE [RS_318] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_315] + Please refer to the previous Select Operator [SEL_317] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_314] + SHUFFLE [RS_316] PartitionCols:_col0, _col3 - Select Operator [SEL_313] (rows=525333486 width=122) + Select Operator [SEL_315] (rows=525333486 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_312] (rows=525333486 width=122) - predicate:(ss_customer_sk is not null and ss_store_sk is not null and ss_item_sk BETWEEN DynamicValue(RS_32_item_i_item_sk_min) AND DynamicValue(RS_32_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_32_item_i_item_sk_bloom_filter))) + Filter Operator [FIL_314] (rows=525333486 width=122) + predicate:(ss_customer_sk is not null and ss_store_sk is not null and ss_item_sk BETWEEN DynamicValue(RS_33_item_i_item_sk_min) AND DynamicValue(RS_33_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_33_item_i_item_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=122) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_311] - Group By Operator [GBY_310] (rows=1 width=12) + BROADCAST [RS_313] + Group By Operator [GBY_312] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_309] - Group By Operator [GBY_308] (rows=1 width=12) + SHUFFLE [RS_311] + Group By Operator [GBY_310] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_307] (rows=7000 width=4) + Select Operator [SEL_309] (rows=7000 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_304] + Please refer to the previous Select Operator [SEL_306] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query26.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query26.q.out index 6a59e838e0..79a9c0233b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query26.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query26.q.out @@ -53,107 +53,107 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Map 5 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 7 <- Map 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_128] - Limit [LIM_127] (rows=100 width=444) + Reducer 4 vectorized + File Output Operator [FS_129] + Limit [LIM_128] (rows=100 width=444) Number of rows:100 - Select Operator [SEL_126] (rows=310774 width=444) + Select Operator [SEL_127] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_125] - Select Operator [SEL_124] (rows=310774 width=444) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_126] + Select Operator [SEL_125] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_123] (rows=310774 width=476) + Group By Operator [GBY_124] (rows=310774 width=476) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_28] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_29] PartitionCols:_col0 - Group By Operator [GBY_27] (rows=462000 width=476) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col12 - Top N Key Operator [TNK_57] (rows=809521 width=100) - keys:_col12,top n:100 - Merge Join Operator [MERGEJOIN_103] (rows=809521 width=100) - Conds:RS_23._col2=RS_122._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col12"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_122] + Group By Operator [GBY_28] (rows=462000 width=476) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col6)","count(_col6)","sum(_col7)","count(_col7)","sum(_col9)","count(_col9)","sum(_col8)","count(_col8)"],keys:_col1 + Top N Key Operator [TNK_58] (rows=809521 width=100) + keys:_col1,top n:100 + Merge Join Operator [MERGEJOIN_104] (rows=809521 width=100) + Conds:RS_106._col0=RS_25._col2(Inner),Output:["_col1","_col6","_col7","_col8","_col9"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_106] PartitionCols:_col0 - Select Operator [SEL_121] (rows=462000 width=104) + Select Operator [SEL_105] (rows=462000 width=104) Output:["_col0","_col1"] - TableScan [TS_12] (rows=462000 width=104) + TableScan [TS_0] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_23] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_25] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_102] (rows=809521 width=4) - Conds:RS_20._col3=RS_120._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] + Merge Join Operator [MERGEJOIN_103] (rows=809521 width=4) + Conds:RS_20._col3=RS_123._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_123] PartitionCols:_col0 - Select Operator [SEL_119] (rows=2300 width=4) + Select Operator [SEL_122] (rows=2300 width=4) Output:["_col0"] - Filter Operator [FIL_118] (rows=2300 width=174) + Filter Operator [FIL_121] (rows=2300 width=174) predicate:((p_channel_email = 'N') or (p_channel_event = 'N')) - TableScan [TS_9] (rows=2300 width=174) + TableScan [TS_11] (rows=2300 width=174) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_email","p_channel_event"] - <-Reducer 3 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_20] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_101] (rows=809521 width=4) - Conds:RS_17._col0=RS_117._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] + Merge Join Operator [MERGEJOIN_102] (rows=809521 width=4) + Conds:RS_17._col0=RS_120._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_120] PartitionCols:_col0 - Select Operator [SEL_116] (rows=652 width=4) + Select Operator [SEL_119] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_115] (rows=652 width=8) + Filter Operator [FIL_118] (rows=652 width=8) predicate:(d_year = 1998) - TableScan [TS_6] (rows=73049 width=8) + TableScan [TS_8] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_100] (rows=2283326 width=135) - Conds:RS_114._col1=RS_106._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_106] + Merge Join Operator [MERGEJOIN_101] (rows=2283326 width=135) + Conds:RS_117._col1=RS_109._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_109] PartitionCols:_col0 - Select Operator [SEL_105] (rows=14776 width=4) + Select Operator [SEL_108] (rows=14776 width=4) Output:["_col0"] - Filter Operator [FIL_104] (rows=14776 width=268) + Filter Operator [FIL_107] (rows=14776 width=268) predicate:((cd_marital_status = 'W') and (cd_education_status = 'Primary') and (cd_gender = 'F')) - TableScan [TS_3] (rows=1861800 width=268) + TableScan [TS_5] (rows=1861800 width=268) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_114] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_117] PartitionCols:_col1 - Select Operator [SEL_113] (rows=283691050 width=354) + Select Operator [SEL_116] (rows=283691050 width=354) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_112] (rows=283691050 width=354) + Filter Operator [FIL_115] (rows=283691050 width=354) predicate:(cs_promo_sk is not null and cs_sold_date_sk is not null and cs_bill_cdemo_sk is not null and cs_bill_cdemo_sk BETWEEN DynamicValue(RS_15_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_15_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_15_customer_demographics_cd_demo_sk_bloom_filter))) - TableScan [TS_0] (rows=287989836 width=354) + TableScan [TS_2] (rows=287989836 width=354) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_cdemo_sk","cs_item_sk","cs_promo_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_111] - Group By Operator [GBY_110] (rows=1 width=12) + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_114] + Group By Operator [GBY_113] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_109] - Group By Operator [GBY_108] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_112] + Group By Operator [GBY_111] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_107] (rows=14776 width=4) + Select Operator [SEL_110] (rows=14776 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_105] + Please refer to the previous Select Operator [SEL_108] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query27.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query27.q.out index 4f9060b968..64bc2b99a3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query27.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query27.q.out @@ -57,109 +57,109 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Map 5 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 7 <- Map 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_129] - Limit [LIM_128] (rows=100 width=538) + Reducer 4 vectorized + File Output Operator [FS_130] + Limit [LIM_129] (rows=100 width=538) Number of rows:100 - Select Operator [SEL_127] (rows=4281825 width=538) + Select Operator [SEL_128] (rows=4281825 width=538) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] - Select Operator [SEL_125] (rows=4281825 width=538) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_127] + Select Operator [SEL_126] (rows=4281825 width=538) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_124] (rows=4281825 width=570) + Group By Operator [GBY_125] (rows=4281825 width=570) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_29] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_28] (rows=4281825 width=570) + Group By Operator [GBY_29] (rows=4281825 width=570) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col2)","count(_col2)","sum(_col3)","count(_col3)","sum(_col4)","count(_col4)","sum(_col5)","count(_col5)"],keys:_col0, _col1, 0L - Select Operator [SEL_26] (rows=1427275 width=186) + Select Operator [SEL_27] (rows=1427275 width=186) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Top N Key Operator [TNK_58] (rows=1427275 width=186) - keys:_col13, _col11,top n:100 - Merge Join Operator [MERGEJOIN_104] (rows=1427275 width=186) - Conds:RS_23._col1=RS_123._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col11","_col13"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] + Top N Key Operator [TNK_59] (rows=1427275 width=186) + keys:_col1, _col13,top n:100 + Merge Join Operator [MERGEJOIN_105] (rows=1427275 width=186) + Conds:RS_107._col0=RS_25._col1(Inner),Output:["_col1","_col6","_col7","_col8","_col9","_col13"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_107] PartitionCols:_col0 - Select Operator [SEL_122] (rows=462000 width=104) + Select Operator [SEL_106] (rows=462000 width=104) Output:["_col0","_col1"] - TableScan [TS_12] (rows=462000 width=104) + TableScan [TS_0] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_23] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_25] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_103] (rows=1427275 width=90) - Conds:RS_20._col3=RS_121._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col11"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_121] + Merge Join Operator [MERGEJOIN_104] (rows=1427275 width=90) + Conds:RS_20._col3=RS_124._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col11"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_124] PartitionCols:_col0 - Select Operator [SEL_120] (rows=209 width=90) + Select Operator [SEL_123] (rows=209 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_119] (rows=209 width=90) + Filter Operator [FIL_122] (rows=209 width=90) predicate:(s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') - TableScan [TS_9] (rows=1704 width=90) + TableScan [TS_11] (rows=1704 width=90) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] - <-Reducer 3 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_20] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_102] (rows=1441779 width=4) - Conds:RS_17._col0=RS_118._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] + Merge Join Operator [MERGEJOIN_103] (rows=1441779 width=4) + Conds:RS_17._col0=RS_121._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_121] PartitionCols:_col0 - Select Operator [SEL_117] (rows=652 width=4) + Select Operator [SEL_120] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_116] (rows=652 width=8) + Filter Operator [FIL_119] (rows=652 width=8) predicate:(d_year = 2001) - TableScan [TS_6] (rows=73049 width=8) + TableScan [TS_8] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_101] (rows=4037920 width=4) - Conds:RS_115._col2=RS_107._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_107] + Merge Join Operator [MERGEJOIN_102] (rows=4037920 width=4) + Conds:RS_118._col2=RS_110._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_110] PartitionCols:_col0 - Select Operator [SEL_106] (rows=14776 width=4) + Select Operator [SEL_109] (rows=14776 width=4) Output:["_col0"] - Filter Operator [FIL_105] (rows=14776 width=268) + Filter Operator [FIL_108] (rows=14776 width=268) predicate:((cd_marital_status = 'U') and (cd_education_status = '2 yr Degree') and (cd_gender = 'M')) - TableScan [TS_3] (rows=1861800 width=268) + TableScan [TS_5] (rows=1861800 width=268) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_115] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_118] PartitionCols:_col2 - Select Operator [SEL_114] (rows=501690006 width=340) + Select Operator [SEL_117] (rows=501690006 width=340) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_113] (rows=501690006 width=340) + Filter Operator [FIL_116] (rows=501690006 width=340) predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_cdemo_sk BETWEEN DynamicValue(RS_15_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_15_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_15_customer_demographics_cd_demo_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=340) + TableScan [TS_2] (rows=575995635 width=340) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_store_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_112] - Group By Operator [GBY_111] (rows=1 width=12) + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_115] + Group By Operator [GBY_114] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_110] - Group By Operator [GBY_109] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_113] + Group By Operator [GBY_112] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_108] (rows=14776 width=4) + Select Operator [SEL_111] (rows=14776 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_106] + Please refer to the previous Select Operator [SEL_109] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query30.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query30.q.out index ee51918979..8984a85178 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query30.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query30.q.out @@ -87,141 +87,141 @@ Stage-0 limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_216] - Limit [LIM_215] (rows=100 width=942) + File Output Operator [FS_218] + Limit [LIM_217] (rows=100 width=942) Number of rows:100 - Select Operator [SEL_214] (rows=704993 width=930) + Select Operator [SEL_216] (rows=704993 width=930) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_66] - Select Operator [SEL_65] (rows=704993 width=930) + SHUFFLE [RS_68] + Select Operator [SEL_67] (rows=704993 width=930) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - Top N Key Operator [TNK_104] (rows=704993 width=942) + Top N Key Operator [TNK_106] (rows=704993 width=942) keys:_col1, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col17,top n:100 - Merge Join Operator [MERGEJOIN_181] (rows=704993 width=942) - Conds:RS_62._col0=RS_63._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col17"] + Merge Join Operator [MERGEJOIN_183] (rows=704993 width=942) + Conds:RS_64._col0=RS_65._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col17"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_62] + SHUFFLE [RS_64] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_175] (rows=1568628 width=834) - Conds:RS_184._col2=RS_191._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Merge Join Operator [MERGEJOIN_177] (rows=1568628 width=834) + Conds:RS_186._col2=RS_193._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_191] + SHUFFLE [RS_193] PartitionCols:_col0 - Select Operator [SEL_188] (rows=784314 width=4) + Select Operator [SEL_190] (rows=784314 width=4) Output:["_col0"] - Filter Operator [FIL_185] (rows=784314 width=90) + Filter Operator [FIL_187] (rows=784314 width=90) predicate:(ca_state = 'IL') TableScan [TS_3] (rows=40000000 width=90) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_184] + SHUFFLE [RS_186] PartitionCols:_col2 - Select Operator [SEL_183] (rows=80000000 width=849) + Select Operator [SEL_185] (rows=80000000 width=849) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Filter Operator [FIL_182] (rows=80000000 width=849) + Filter Operator [FIL_184] (rows=80000000 width=849) predicate:c_current_addr_sk is not null TableScan [TS_0] (rows=80000000 width=849) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_day","c_birth_month","c_birth_year","c_birth_country","c_login","c_email_address","c_last_review_date"] <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_63] + SHUFFLE [RS_65] PartitionCols:_col0 - Select Operator [SEL_58] (rows=704993 width=227) + Select Operator [SEL_60] (rows=704993 width=227) Output:["_col0","_col2"] - Filter Operator [FIL_57] (rows=704993 width=227) + Filter Operator [FIL_59] (rows=704993 width=227) predicate:(_col2 > _col3) - Merge Join Operator [MERGEJOIN_180] (rows=2114980 width=227) - Conds:RS_207._col1=RS_213._col1(Inner),Output:["_col0","_col2","_col3"] + Merge Join Operator [MERGEJOIN_182] (rows=2114980 width=227) + Conds:RS_209._col1=RS_215._col1(Inner),Output:["_col0","_col2","_col3"] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_213] + SHUFFLE [RS_215] PartitionCols:_col1 - Select Operator [SEL_212] (rows=6 width=198) + Select Operator [SEL_214] (rows=6 width=198) Output:["_col0","_col1"] - Filter Operator [FIL_211] (rows=6 width=206) + Filter Operator [FIL_213] (rows=6 width=206) predicate:CAST( (_col1 / _col2) AS decimal(21,6)) is not null - Group By Operator [GBY_210] (rows=6 width=206) + Group By Operator [GBY_212] (rows=6 width=206) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col0 - Select Operator [SEL_209] (rows=2537976 width=201) + Select Operator [SEL_211] (rows=2537976 width=201) Output:["_col0","_col2"] - Group By Operator [GBY_208] (rows=2537976 width=201) + Group By Operator [GBY_210] (rows=2537976 width=201) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_45] + SHUFFLE [RS_47] PartitionCols:_col0 - Group By Operator [GBY_44] (rows=3923529 width=201) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col6, _col1 - Merge Join Operator [MERGEJOIN_179] (rows=3923529 width=184) - Conds:RS_40._col2=RS_193._col0(Inner),Output:["_col1","_col3","_col6"] + Group By Operator [GBY_46] (rows=3923529 width=201) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col5)"],keys:_col1, _col3 + Merge Join Operator [MERGEJOIN_181] (rows=3923529 width=184) + Conds:RS_195._col0=RS_43._col2(Inner),Output:["_col1","_col3","_col5"] <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] + SHUFFLE [RS_195] PartitionCols:_col0 - Select Operator [SEL_190] (rows=40000000 width=90) + Select Operator [SEL_192] (rows=40000000 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_187] (rows=40000000 width=90) + Filter Operator [FIL_189] (rows=40000000 width=90) predicate:ca_state is not null Please refer to the previous TableScan [TS_3] <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_40] + SHUFFLE [RS_43] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_178] (rows=3923529 width=101) - Conds:RS_199._col0=RS_203._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_180] (rows=3923529 width=101) + Conds:RS_201._col0=RS_205._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_199] + SHUFFLE [RS_201] PartitionCols:_col0 - Select Operator [SEL_197] (rows=13130761 width=118) + Select Operator [SEL_199] (rows=13130761 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_195] (rows=13130761 width=118) + Filter Operator [FIL_197] (rows=13130761 width=118) predicate:(wr_returned_date_sk is not null and wr_returning_addr_sk is not null) - TableScan [TS_6] (rows=14398467 width=118) + TableScan [TS_9] (rows=14398467 width=118) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_returning_customer_sk","wr_returning_addr_sk","wr_return_amt"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_203] + SHUFFLE [RS_205] PartitionCols:_col0 - Select Operator [SEL_201] (rows=652 width=4) + Select Operator [SEL_203] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_200] (rows=652 width=8) + Filter Operator [FIL_202] (rows=652 width=8) predicate:(d_year = 2002) - TableScan [TS_9] (rows=73049 width=8) + TableScan [TS_12] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_207] + SHUFFLE [RS_209] PartitionCols:_col1 - Filter Operator [FIL_206] (rows=2114980 width=201) + Filter Operator [FIL_208] (rows=2114980 width=201) predicate:_col2 is not null - Select Operator [SEL_205] (rows=2114980 width=201) + Select Operator [SEL_207] (rows=2114980 width=201) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_204] (rows=2114980 width=201) + Group By Operator [GBY_206] (rows=2114980 width=201) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_23] + SHUFFLE [RS_24] PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=3746772 width=201) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col6, _col1 - Merge Join Operator [MERGEJOIN_177] (rows=3746772 width=184) - Conds:RS_18._col2=RS_192._col0(Inner),Output:["_col1","_col3","_col6"] + Group By Operator [GBY_23] (rows=3746772 width=201) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col5)"],keys:_col1, _col3 + Merge Join Operator [MERGEJOIN_179] (rows=3746772 width=184) + Conds:RS_194._col0=RS_20._col2(Inner),Output:["_col1","_col3","_col5"] <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_192] + SHUFFLE [RS_194] PartitionCols:_col0 - Select Operator [SEL_189] (rows=40000000 width=90) + Select Operator [SEL_191] (rows=40000000 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_186] (rows=40000000 width=90) + Filter Operator [FIL_188] (rows=40000000 width=90) predicate:ca_state is not null Please refer to the previous TableScan [TS_3] <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_18] + SHUFFLE [RS_20] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_176] (rows=3746772 width=101) - Conds:RS_198._col0=RS_202._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_178] (rows=3746772 width=101) + Conds:RS_200._col0=RS_204._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] + SHUFFLE [RS_200] PartitionCols:_col0 - Select Operator [SEL_196] (rows=12539214 width=118) + Select Operator [SEL_198] (rows=12539214 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_194] (rows=12539214 width=118) + Filter Operator [FIL_196] (rows=12539214 width=118) predicate:(wr_returning_customer_sk is not null and wr_returned_date_sk is not null and wr_returning_addr_sk is not null) - Please refer to the previous TableScan [TS_6] + Please refer to the previous TableScan [TS_9] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_202] + SHUFFLE [RS_204] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_201] + Please refer to the previous Select Operator [SEL_203] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query31.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query31.q.out index 6364d87393..fe9f7795ee 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query31.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query31.q.out @@ -113,376 +113,376 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Map 33 <- Reducer 13 (BROADCAST_EDGE) -Map 34 <- Reducer 17 (BROADCAST_EDGE) -Map 35 <- Reducer 23 (BROADCAST_EDGE) -Map 36 <- Reducer 27 (BROADCAST_EDGE) -Map 37 <- Reducer 31 (BROADCAST_EDGE) -Reducer 10 <- Map 33 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 11 <- Map 32 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Map 19 <- Reducer 22 (BROADCAST_EDGE) +Map 33 <- Reducer 24 (BROADCAST_EDGE) +Map 34 <- Reducer 26 (BROADCAST_EDGE) +Map 35 <- Reducer 28 (BROADCAST_EDGE) +Map 36 <- Reducer 30 (BROADCAST_EDGE) +Map 37 <- Reducer 32 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 1 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 34 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 15 <- Map 32 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 15 <- Map 1 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 17 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 35 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 19 <- Map 32 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) -Reducer 23 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 36 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 25 <- Map 32 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 28 <- Map 37 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 29 <- Map 32 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) -Reducer 3 <- Map 32 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Reducer 29 (SIMPLE_EDGE) -Reducer 31 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 22 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 1 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 21 (SIMPLE_EDGE), Map 33 (SIMPLE_EDGE) +Reducer 24 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 21 (SIMPLE_EDGE), Map 34 (SIMPLE_EDGE) +Reducer 26 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 21 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) +Reducer 28 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 21 (SIMPLE_EDGE), Map 36 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 31 <- Map 21 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) +Reducer 32 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Reducer 10 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 14 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 - File Output Operator [FS_139] - Select Operator [SEL_138] (rows=110 width=550) + Reducer 6 + File Output Operator [FS_145] + Select Operator [SEL_144] (rows=110 width=550) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_136] (rows=110 width=778) + Filter Operator [FIL_142] (rows=110 width=778) predicate:(CASE WHEN ((_col9 > 0)) THEN (CASE WHEN (_col7) THEN (((_col4 / _col6) > (_col13 / _col9))) ELSE (false) END) ELSE (false) END and CASE WHEN ((_col11 > 0)) THEN (CASE WHEN (_col2) THEN (((_col6 / _col1) > (_col9 / _col11))) ELSE (false) END) ELSE (false) END) - Merge Join Operator [MERGEJOIN_450] (rows=440 width=778) - Conds:RS_133._col0=RS_134._col0(Inner),Output:["_col1","_col2","_col4","_col6","_col7","_col8","_col9","_col11","_col13"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_134] + Merge Join Operator [MERGEJOIN_456] (rows=440 width=778) + Conds:RS_139._col0=RS_140._col0(Inner),Output:["_col1","_col2","_col4","_col6","_col7","_col8","_col9","_col11","_col13"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_140] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_448] (rows=1605 width=434) - Conds:RS_123._col0=RS_538._col0(Inner),Output:["_col0","_col1","_col3","_col5"] - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_123] + Merge Join Operator [MERGEJOIN_454] (rows=1605 width=434) + Conds:RS_129._col0=RS_544._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_129] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_447] (rows=1605 width=322) - Conds:RS_524._col0=RS_531._col0(Inner),Output:["_col0","_col1","_col3"] - <-Reducer 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_524] + Merge Join Operator [MERGEJOIN_453] (rows=1605 width=322) + Conds:RS_530._col0=RS_537._col0(Inner),Output:["_col0","_col1","_col3"] + <-Reducer 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_530] PartitionCols:_col0 - Group By Operator [GBY_523] (rows=1605 width=210) + Group By Operator [GBY_529] (rows=1605 width=210) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_77] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_81] PartitionCols:_col0 - Group By Operator [GBY_76] (rows=33705 width=210) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_441] (rows=37399561 width=139) - Conds:RS_72._col1=RS_497._col0(Inner),Output:["_col2","_col5"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_497] + Group By Operator [GBY_80] (rows=33705 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col4)"],keys:_col1 + Merge Join Operator [MERGEJOIN_447] (rows=37399561 width=139) + Conds:RS_462._col0=RS_77._col1(Inner),Output:["_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_462] PartitionCols:_col0 - Select Operator [SEL_493] (rows=40000000 width=102) + Select Operator [SEL_458] (rows=40000000 width=102) Output:["_col0","_col1"] - Filter Operator [FIL_492] (rows=40000000 width=102) + Filter Operator [FIL_457] (rows=40000000 width=102) predicate:ca_county is not null - TableScan [TS_6] (rows=40000000 width=102) + TableScan [TS_0] (rows=40000000 width=102) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_72] + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_77] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_440] (rows=37399561 width=42) - Conds:RS_522._col0=RS_469._col0(Inner),Output:["_col1","_col2"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_469] + Merge Join Operator [MERGEJOIN_446] (rows=37399561 width=42) + Conds:RS_528._col0=RS_483._col0(Inner),Output:["_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_483] PartitionCols:_col0 - Select Operator [SEL_460] (rows=130 width=4) + Select Operator [SEL_474] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_454] (rows=130 width=12) + Filter Operator [FIL_468] (rows=130 width=12) predicate:((d_year = 2000) and (d_qoy = 2)) - TableScan [TS_3] (rows=73049 width=12) + TableScan [TS_6] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 35 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_522] + SHUFFLE [RS_528] PartitionCols:_col0 - Select Operator [SEL_521] (rows=525327191 width=114) + Select Operator [SEL_527] (rows=525327191 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_520] (rows=525327191 width=114) - predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_70_date_dim_d_date_sk_min) AND DynamicValue(RS_70_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_70_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_60] (rows=575995635 width=114) + Filter Operator [FIL_526] (rows=525327191 width=114) + predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_73_date_dim_d_date_sk_min) AND DynamicValue(RS_73_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_73_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_66] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_519] - Group By Operator [GBY_518] (rows=1 width=12) + <-Reducer 28 [BROADCAST_EDGE] vectorized + BROADCAST [RS_525] + Group By Operator [GBY_524] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_484] - Group By Operator [GBY_478] (rows=1 width=12) + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_498] + Group By Operator [GBY_492] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_470] (rows=130 width=4) + Select Operator [SEL_484] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_460] - <-Reducer 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_531] + Please refer to the previous Select Operator [SEL_474] + <-Reducer 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_537] PartitionCols:_col0 - Group By Operator [GBY_530] (rows=1605 width=210) + Group By Operator [GBY_536] (rows=1605 width=210) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_97] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_102] PartitionCols:_col0 - Group By Operator [GBY_96] (rows=33705 width=210) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_443] (rows=37399561 width=139) - Conds:RS_92._col1=RS_498._col0(Inner),Output:["_col2","_col5"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_498] + Group By Operator [GBY_101] (rows=33705 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col4)"],keys:_col1 + Merge Join Operator [MERGEJOIN_449] (rows=37399561 width=139) + Conds:RS_463._col0=RS_98._col1(Inner),Output:["_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_463] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_493] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_92] + Please refer to the previous Select Operator [SEL_458] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_98] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_442] (rows=37399561 width=42) - Conds:RS_529._col0=RS_471._col0(Inner),Output:["_col1","_col2"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_471] + Merge Join Operator [MERGEJOIN_448] (rows=37399561 width=42) + Conds:RS_535._col0=RS_485._col0(Inner),Output:["_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_485] PartitionCols:_col0 - Select Operator [SEL_461] (rows=130 width=4) + Select Operator [SEL_475] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_455] (rows=130 width=12) + Filter Operator [FIL_469] (rows=130 width=12) predicate:((d_year = 2000) and (d_qoy = 1)) - Please refer to the previous TableScan [TS_3] + Please refer to the previous TableScan [TS_6] <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_529] + SHUFFLE [RS_535] PartitionCols:_col0 - Select Operator [SEL_528] (rows=525327191 width=114) + Select Operator [SEL_534] (rows=525327191 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_527] (rows=525327191 width=114) - predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_90_date_dim_d_date_sk_min) AND DynamicValue(RS_90_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_90_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_80] (rows=575995635 width=114) + Filter Operator [FIL_533] (rows=525327191 width=114) + predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_94_date_dim_d_date_sk_min) AND DynamicValue(RS_94_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_94_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_87] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_526] - Group By Operator [GBY_525] (rows=1 width=12) + <-Reducer 30 [BROADCAST_EDGE] vectorized + BROADCAST [RS_532] + Group By Operator [GBY_531] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_485] - Group By Operator [GBY_479] (rows=1 width=12) + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_499] + Group By Operator [GBY_493] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_472] (rows=130 width=4) + Select Operator [SEL_486] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_461] - <-Reducer 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_538] + Please refer to the previous Select Operator [SEL_475] + <-Reducer 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_544] PartitionCols:_col0 - Group By Operator [GBY_537] (rows=1605 width=210) + Group By Operator [GBY_543] (rows=1605 width=210) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_117] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_123] PartitionCols:_col0 - Group By Operator [GBY_116] (rows=33705 width=210) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_445] (rows=37399561 width=139) - Conds:RS_112._col1=RS_499._col0(Inner),Output:["_col2","_col5"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_499] + Group By Operator [GBY_122] (rows=33705 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col4)"],keys:_col1 + Merge Join Operator [MERGEJOIN_451] (rows=37399561 width=139) + Conds:RS_464._col0=RS_119._col1(Inner),Output:["_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_464] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_493] - <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_112] + Please refer to the previous Select Operator [SEL_458] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_119] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_444] (rows=37399561 width=42) - Conds:RS_536._col0=RS_473._col0(Inner),Output:["_col1","_col2"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_473] + Merge Join Operator [MERGEJOIN_450] (rows=37399561 width=42) + Conds:RS_542._col0=RS_487._col0(Inner),Output:["_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_487] PartitionCols:_col0 - Select Operator [SEL_462] (rows=130 width=4) + Select Operator [SEL_476] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_456] (rows=130 width=12) + Filter Operator [FIL_470] (rows=130 width=12) predicate:((d_year = 2000) and (d_qoy = 3)) - Please refer to the previous TableScan [TS_3] + Please refer to the previous TableScan [TS_6] <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_536] + SHUFFLE [RS_542] PartitionCols:_col0 - Select Operator [SEL_535] (rows=525327191 width=114) + Select Operator [SEL_541] (rows=525327191 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_534] (rows=525327191 width=114) - predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_110_date_dim_d_date_sk_min) AND DynamicValue(RS_110_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_110_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_100] (rows=575995635 width=114) + Filter Operator [FIL_540] (rows=525327191 width=114) + predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_115_date_dim_d_date_sk_min) AND DynamicValue(RS_115_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_115_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_108] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 31 [BROADCAST_EDGE] vectorized - BROADCAST [RS_533] - Group By Operator [GBY_532] (rows=1 width=12) + <-Reducer 32 [BROADCAST_EDGE] vectorized + BROADCAST [RS_539] + Group By Operator [GBY_538] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_486] - Group By Operator [GBY_480] (rows=1 width=12) + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_500] + Group By Operator [GBY_494] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_474] (rows=130 width=4) + Select Operator [SEL_488] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_462] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_133] + Please refer to the previous Select Operator [SEL_476] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_139] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_449] (rows=440 width=442) - Conds:RS_130._col0=RS_517._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col6","_col7"] - <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_517] + Merge Join Operator [MERGEJOIN_455] (rows=440 width=442) + Conds:RS_136._col0=RS_523._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col6","_col7"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_523] PartitionCols:_col0 - Select Operator [SEL_516] (rows=440 width=214) + Select Operator [SEL_522] (rows=440 width=214) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_515] (rows=440 width=210) + Group By Operator [GBY_521] (rows=440 width=210) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_57] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_60] PartitionCols:_col0 - Group By Operator [GBY_56] (rows=3960 width=210) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_439] (rows=10246882 width=209) - Conds:RS_52._col1=RS_496._col0(Inner),Output:["_col2","_col5"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_496] + Group By Operator [GBY_59] (rows=3960 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col4)"],keys:_col1 + Merge Join Operator [MERGEJOIN_445] (rows=10246882 width=209) + Conds:RS_461._col0=RS_56._col1(Inner),Output:["_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_461] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_493] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_52] + Please refer to the previous Select Operator [SEL_458] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_56] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_438] (rows=10246882 width=115) - Conds:RS_514._col0=RS_467._col0(Inner),Output:["_col1","_col2"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_467] + Merge Join Operator [MERGEJOIN_444] (rows=10246882 width=115) + Conds:RS_520._col0=RS_481._col0(Inner),Output:["_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_481] PartitionCols:_col0 - Select Operator [SEL_459] (rows=130 width=4) + Select Operator [SEL_473] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_453] (rows=130 width=12) + Filter Operator [FIL_467] (rows=130 width=12) predicate:((d_year = 2000) and (d_qoy = 2)) - Please refer to the previous TableScan [TS_3] + Please refer to the previous TableScan [TS_6] <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_514] + SHUFFLE [RS_520] PartitionCols:_col0 - Select Operator [SEL_513] (rows=143931246 width=119) + Select Operator [SEL_519] (rows=143931246 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_512] (rows=143931246 width=119) - predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_40] (rows=144002668 width=119) + Filter Operator [FIL_518] (rows=143931246 width=119) + predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_52_date_dim_d_date_sk_min) AND DynamicValue(RS_52_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_52_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_45] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_511] - Group By Operator [GBY_510] (rows=1 width=12) + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_517] + Group By Operator [GBY_516] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_483] - Group By Operator [GBY_477] (rows=1 width=12) + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_497] + Group By Operator [GBY_491] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_468] (rows=130 width=4) + Select Operator [SEL_482] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_459] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_130] + Please refer to the previous Select Operator [SEL_473] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_136] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_446] (rows=440 width=326) - Conds:RS_502._col0=RS_509._col0(Inner),Output:["_col0","_col1","_col2","_col4"] - <-Reducer 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_509] - PartitionCols:_col0 - Group By Operator [GBY_508] (rows=440 width=210) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col0 - Group By Operator [GBY_36] (rows=3960 width=210) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_437] (rows=10246882 width=209) - Conds:RS_32._col1=RS_495._col0(Inner),Output:["_col2","_col5"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_495] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_493] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_436] (rows=10246882 width=115) - Conds:RS_507._col0=RS_465._col0(Inner),Output:["_col1","_col2"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_465] - PartitionCols:_col0 - Select Operator [SEL_458] (rows=130 width=4) - Output:["_col0"] - Filter Operator [FIL_452] (rows=130 width=12) - predicate:((d_year = 2000) and (d_qoy = 3)) - Please refer to the previous TableScan [TS_3] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_507] - PartitionCols:_col0 - Select Operator [SEL_506] (rows=143931246 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_505] (rows=143931246 width=119) - predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_20] (rows=144002668 width=119) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_504] - Group By Operator [GBY_503] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_482] - Group By Operator [GBY_476] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_466] (rows=130 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_458] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_502] + Merge Join Operator [MERGEJOIN_452] (rows=440 width=326) + Conds:RS_508._col0=RS_515._col0(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_508] PartitionCols:_col0 - Select Operator [SEL_501] (rows=440 width=214) + Select Operator [SEL_507] (rows=440 width=214) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_500] (rows=440 width=210) + Group By Operator [GBY_506] (rows=440 width=210) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=3960 width=210) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_435] (rows=10246882 width=209) - Conds:RS_12._col1=RS_494._col0(Inner),Output:["_col2","_col5"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_494] + Group By Operator [GBY_17] (rows=3960 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col4)"],keys:_col1 + Merge Join Operator [MERGEJOIN_441] (rows=10246882 width=209) + Conds:RS_459._col0=RS_14._col1(Inner),Output:["_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_459] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_493] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + Please refer to the previous Select Operator [SEL_458] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_434] (rows=10246882 width=115) - Conds:RS_491._col0=RS_463._col0(Inner),Output:["_col1","_col2"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_463] + Merge Join Operator [MERGEJOIN_440] (rows=10246882 width=115) + Conds:RS_505._col0=RS_477._col0(Inner),Output:["_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_477] PartitionCols:_col0 - Select Operator [SEL_457] (rows=130 width=4) + Select Operator [SEL_471] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_451] (rows=130 width=12) + Filter Operator [FIL_465] (rows=130 width=12) predicate:((d_year = 2000) and (d_qoy = 1)) - Please refer to the previous TableScan [TS_3] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_491] + Please refer to the previous TableScan [TS_6] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_505] PartitionCols:_col0 - Select Operator [SEL_490] (rows=143931246 width=119) + Select Operator [SEL_504] (rows=143931246 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_489] (rows=143931246 width=119) + Filter Operator [FIL_503] (rows=143931246 width=119) predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=119) + TableScan [TS_3] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_488] - Group By Operator [GBY_487] (rows=1 width=12) + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_502] + Group By Operator [GBY_501] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_481] - Group By Operator [GBY_475] (rows=1 width=12) + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_495] + Group By Operator [GBY_489] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_464] (rows=130 width=4) + Select Operator [SEL_478] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_457] + Please refer to the previous Select Operator [SEL_471] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_515] + PartitionCols:_col0 + Group By Operator [GBY_514] (rows=440 width=210) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0 + Group By Operator [GBY_38] (rows=3960 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col4)"],keys:_col1 + Merge Join Operator [MERGEJOIN_443] (rows=10246882 width=209) + Conds:RS_460._col0=RS_35._col1(Inner),Output:["_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_460] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_458] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_442] (rows=10246882 width=115) + Conds:RS_513._col0=RS_479._col0(Inner),Output:["_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_479] + PartitionCols:_col0 + Select Operator [SEL_472] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_466] (rows=130 width=12) + predicate:((d_year = 2000) and (d_qoy = 3)) + Please refer to the previous TableScan [TS_6] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_513] + PartitionCols:_col0 + Select Operator [SEL_512] (rows=143931246 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_511] (rows=143931246 width=119) + predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_24] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_510] + Group By Operator [GBY_509] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_496] + Group By Operator [GBY_490] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_480] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_472] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out index bba2eb4d79..b78eb9ecce 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out @@ -163,27 +163,27 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 14 <- Reducer 18 (BROADCAST_EDGE) -Map 26 <- Reducer 21 (BROADCAST_EDGE) -Map 27 <- Reducer 24 (BROADCAST_EDGE) -Reducer 10 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Map 18 <- Reducer 21 (BROADCAST_EDGE) +Map 26 <- Reducer 23 (BROADCAST_EDGE) +Map 27 <- Reducer 25 (BROADCAST_EDGE) +Reducer 10 <- Reducer 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 13 <- Map 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 16 <- Map 25 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 16 <- Map 14 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 17 <- Map 14 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 20 <- Map 25 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 17 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 23 <- Map 25 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 20 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 23 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 20 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 25 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 8 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 5 (CONTAINS) Stage-0 @@ -191,226 +191,220 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_359] - Limit [LIM_358] (rows=59 width=115) + File Output Operator [FS_362] + Limit [LIM_361] (rows=59 width=115) Number of rows:100 - Select Operator [SEL_357] (rows=59 width=115) + Select Operator [SEL_360] (rows=59 width=115) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_356] - Top N Key Operator [TNK_355] (rows=59 width=115) + SHUFFLE [RS_359] + Top N Key Operator [TNK_358] (rows=59 width=115) keys:_col1,top n:100 - Group By Operator [GBY_354] (rows=59 width=115) + Group By Operator [GBY_357] (rows=59 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Union 5 [SIMPLE_EDGE] <-Reducer 11 [CONTAINS] vectorized - Reduce Output Operator [RS_375] + Reduce Output Operator [RS_378] PartitionCols:_col0 - Group By Operator [GBY_374] (rows=59 width=115) + Group By Operator [GBY_377] (rows=59 width=115) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_373] (rows=19 width=115) + Group By Operator [GBY_376] (rows=19 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_109] + SHUFFLE [RS_112] PartitionCols:_col0 - Group By Operator [GBY_108] (rows=19 width=115) + Group By Operator [GBY_111] (rows=19 width=115) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_305] (rows=11364 width=3) - Conds:RS_104._col0=RS_105._col2(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_308] (rows=11364 width=3) + Conds:RS_107._col0=RS_108._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_104] + SHUFFLE [RS_107] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_294] (rows=461514 width=7) - Conds:RS_320._col1=RS_326._col0(Inner),Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_297] (rows=461514 width=7) + Conds:RS_323._col1=RS_329._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + SHUFFLE [RS_323] PartitionCols:_col1 - Select Operator [SEL_319] (rows=460848 width=7) + Select Operator [SEL_322] (rows=460848 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_318] (rows=460848 width=7) + Filter Operator [FIL_321] (rows=460848 width=7) predicate:i_manufact_id is not null TableScan [TS_0] (rows=462000 width=7) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_manufact_id"] <-Reducer 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_326] + SHUFFLE [RS_329] PartitionCols:_col0 - Group By Operator [GBY_325] (rows=692 width=3) + Group By Operator [GBY_328] (rows=692 width=3) Output:["_col0"],keys:KEY._col0 <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_324] + SHUFFLE [RS_327] PartitionCols:_col0 - Group By Operator [GBY_323] (rows=692 width=3) + Group By Operator [GBY_326] (rows=692 width=3) Output:["_col0"],keys:i_manufact_id - Select Operator [SEL_322] (rows=46085 width=93) + Select Operator [SEL_325] (rows=46085 width=93) Output:["i_manufact_id"] - Filter Operator [FIL_321] (rows=46085 width=93) + Filter Operator [FIL_324] (rows=46085 width=93) predicate:((i_category = 'Books') and i_manufact_id is not null) TableScan [TS_3] (rows=462000 width=93) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_category","i_manufact_id"] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_105] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_108] PartitionCols:_col2 - Select Operator [SEL_100] (rows=788222 width=110) - Output:["_col2","_col4"] - Merge Join Operator [MERGEJOIN_302] (rows=788222 width=110) - Conds:RS_97._col2=RS_350._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_350] - PartitionCols:_col0 - Select Operator [SEL_347] (rows=8000000 width=4) - Output:["_col0"] - Filter Operator [FIL_346] (rows=8000000 width=112) - predicate:(ca_gmt_offset = -6) - TableScan [TS_16] (rows=40000000 width=112) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_97] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_301] (rows=3941109 width=118) - Conds:RS_372._col0=RS_333._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_333] - PartitionCols:_col0 - Select Operator [SEL_328] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_327] (rows=50 width=12) - predicate:((d_year = 1999) and (d_moy = 3)) - TableScan [TS_13] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_372] - PartitionCols:_col0 - Select Operator [SEL_371] (rows=143931246 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_370] (rows=143931246 width=123) - predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_95_date_dim_d_date_sk_min) AND DynamicValue(RS_95_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_95_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_85] (rows=144002668 width=123) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_369] - Group By Operator [GBY_368] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_340] - Group By Operator [GBY_337] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_334] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_328] + Merge Join Operator [MERGEJOIN_305] (rows=788222 width=110) + Conds:RS_334._col0=RS_101._col2(Inner),Output:["_col2","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_334] + PartitionCols:_col0 + Select Operator [SEL_331] (rows=8000000 width=4) + Output:["_col0"] + Filter Operator [FIL_330] (rows=8000000 width=112) + predicate:(ca_gmt_offset = -6) + TableScan [TS_10] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_304] (rows=3941109 width=118) + Conds:RS_375._col0=RS_341._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_341] + PartitionCols:_col0 + Select Operator [SEL_336] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_335] (rows=50 width=12) + predicate:((d_year = 1999) and (d_moy = 3)) + TableScan [TS_16] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_375] + PartitionCols:_col0 + Select Operator [SEL_374] (rows=143931246 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_373] (rows=143931246 width=123) + predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_97_date_dim_d_date_sk_min) AND DynamicValue(RS_97_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_97_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_90] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_372] + Group By Operator [GBY_371] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_348] + Group By Operator [GBY_345] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_342] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_336] <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_353] + Reduce Output Operator [RS_356] PartitionCols:_col0 - Group By Operator [GBY_352] (rows=59 width=115) + Group By Operator [GBY_355] (rows=59 width=115) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_351] (rows=64 width=115) + Group By Operator [GBY_354] (rows=64 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_34] + SHUFFLE [RS_35] PartitionCols:_col0 - Group By Operator [GBY_33] (rows=64 width=115) + Group By Operator [GBY_34] (rows=64 width=115) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_303] (rows=41476 width=3) - Conds:RS_29._col0=RS_30._col2(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_306] (rows=41476 width=3) + Conds:RS_30._col0=RS_31._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_294] - <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_30] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_297] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_31] PartitionCols:_col2 - Select Operator [SEL_25] (rows=2876890 width=4) - Output:["_col2","_col4"] - Merge Join Operator [MERGEJOIN_296] (rows=2876890 width=4) - Conds:RS_22._col2=RS_348._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_348] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_347] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_22] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_295] (rows=14384447 width=4) - Conds:RS_345._col0=RS_329._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_329] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_328] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_345] - PartitionCols:_col0 - Select Operator [SEL_344] (rows=525327191 width=118) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_343] (rows=525327191 width=118) - predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_10] (rows=575995635 width=118) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_342] - Group By Operator [GBY_341] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_338] - Group By Operator [GBY_335] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_330] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_328] + Merge Join Operator [MERGEJOIN_299] (rows=2876890 width=4) + Conds:RS_332._col0=RS_24._col2(Inner),Output:["_col2","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_332] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_331] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_298] (rows=14384447 width=4) + Conds:RS_353._col0=RS_337._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_337] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_336] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_353] + PartitionCols:_col0 + Select Operator [SEL_352] (rows=525327191 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_351] (rows=525327191 width=118) + predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_13] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_350] + Group By Operator [GBY_349] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_346] + Group By Operator [GBY_343] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_338] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_336] <-Reducer 9 [CONTAINS] vectorized - Reduce Output Operator [RS_367] + Reduce Output Operator [RS_370] PartitionCols:_col0 - Group By Operator [GBY_366] (rows=59 width=115) + Group By Operator [GBY_369] (rows=59 width=115) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_365] (rows=35 width=115) + Group By Operator [GBY_368] (rows=35 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_71] + SHUFFLE [RS_73] PartitionCols:_col0 - Group By Operator [GBY_70] (rows=35 width=115) + Group By Operator [GBY_72] (rows=35 width=115) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_304] (rows=22352 width=3) - Conds:RS_66._col0=RS_67._col3(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_307] (rows=22352 width=3) + Conds:RS_68._col0=RS_69._col3(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_66] + SHUFFLE [RS_68] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_294] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_67] + Please refer to the previous Merge Join Operator [MERGEJOIN_297] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_69] PartitionCols:_col3 - Select Operator [SEL_62] (rows=1550375 width=13) - Output:["_col3","_col4"] - Merge Join Operator [MERGEJOIN_299] (rows=1550375 width=13) - Conds:RS_59._col1=RS_349._col0(Inner),Output:["_col2","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_349] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_347] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_59] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_298] (rows=7751872 width=98) - Conds:RS_364._col0=RS_331._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_331] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_328] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_364] - PartitionCols:_col0 - Select Operator [SEL_363] (rows=285117733 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_362] (rows=285117733 width=123) - predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_47] (rows=287989836 width=123) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_361] - Group By Operator [GBY_360] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_339] - Group By Operator [GBY_336] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_332] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_328] + Merge Join Operator [MERGEJOIN_302] (rows=1550375 width=13) + Conds:RS_333._col0=RS_62._col1(Inner),Output:["_col3","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_333] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_331] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_62] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_301] (rows=7751872 width=98) + Conds:RS_367._col0=RS_339._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_339] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_336] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_367] + PartitionCols:_col0 + Select Operator [SEL_366] (rows=285117733 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_365] (rows=285117733 width=123) + predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_58_date_dim_d_date_sk_min) AND DynamicValue(RS_58_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_58_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_51] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_364] + Group By Operator [GBY_363] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_347] + Group By Operator [GBY_344] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_340] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_336] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query38.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query38.q.out index a5a8df3d6d..1c0c293618 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query38.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query38.q.out @@ -57,203 +57,203 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Map 19 <- Reducer 13 (BROADCAST_EDGE) -Map 20 <- Reducer 17 (BROADCAST_EDGE) -Reducer 10 <- Map 19 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 13 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 20 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 17 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 6 <- Union 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Map 11 <- Reducer 14 (BROADCAST_EDGE) +Map 19 <- Reducer 16 (BROADCAST_EDGE) +Map 20 <- Reducer 18 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 13 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) +Reducer 16 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 13 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) +Reducer 18 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_230] - Group By Operator [GBY_229] (rows=1 width=8) + Reducer 6 vectorized + File Output Operator [FS_233] + Group By Operator [GBY_232] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_228] - Group By Operator [GBY_227] (rows=1 width=8) + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_231] + Group By Operator [GBY_230] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_226] (rows=1 width=8) - Filter Operator [FIL_225] (rows=1 width=8) + Select Operator [SEL_229] (rows=1 width=8) + Filter Operator [FIL_228] (rows=1 width=8) predicate:(_col3 = 3L) - Select Operator [SEL_224] (rows=165330890 width=8) + Select Operator [SEL_227] (rows=165330890 width=8) Output:["_col3"] - Group By Operator [GBY_223] (rows=165330890 width=282) + Group By Operator [GBY_226] (rows=165330890 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 12 [CONTAINS] vectorized - Reduce Output Operator [RS_240] + <-Union 4 [SIMPLE_EDGE] + <-Reducer 10 [CONTAINS] vectorized + Reduce Output Operator [RS_253] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_239] (rows=165330890 width=282) + Group By Operator [GBY_252] (rows=165330890 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_238] (rows=49146883 width=282) + Group By Operator [GBY_251] (rows=24986582 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_237] (rows=49146883 width=274) + Select Operator [SEL_250] (rows=24986582 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_236] (rows=49146883 width=274) + Group By Operator [GBY_249] (rows=24986582 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_40] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_68] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_39] (rows=49146883 width=274) - Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 - Merge Join Operator [MERGEJOIN_174] (rows=49146883 width=274) - Conds:RS_35._col1=RS_216._col0(Inner),Output:["_col3","_col5","_col6"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_216] + Group By Operator [GBY_67] (rows=24986582 width=274) + Output:["_col0","_col1","_col2"],keys:_col2, _col1, _col6 + Merge Join Operator [MERGEJOIN_179] (rows=24986582 width=274) + Conds:RS_201._col0=RS_64._col1(Inner),Output:["_col1","_col2","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_201] PartitionCols:_col0 - Select Operator [SEL_214] (rows=80000000 width=184) + Select Operator [SEL_198] (rows=80000000 width=184) Output:["_col0","_col1","_col2"] - TableScan [TS_6] (rows=80000000 width=184) + TableScan [TS_0] (rows=80000000 width=184) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_first_name","c_last_name"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_35] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_64] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_173] (rows=49146883 width=97) - Conds:RS_235._col0=RS_199._col0(Inner),Output:["_col1","_col3"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_199] + Merge Join Operator [MERGEJOIN_178] (rows=24986582 width=97) + Conds:RS_248._col0=RS_208._col0(Inner),Output:["_col1","_col3"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_208] PartitionCols:_col0 - Select Operator [SEL_196] (rows=317 width=98) + Select Operator [SEL_203] (rows=317 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_195] (rows=317 width=102) + Filter Operator [FIL_202] (rows=317 width=102) predicate:d_month_seq BETWEEN 1212 AND 1223 - TableScan [TS_3] (rows=73049 width=102) + TableScan [TS_5] (rows=73049 width=102) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_month_seq"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_235] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_248] PartitionCols:_col0 - Select Operator [SEL_234] (rows=285117831 width=7) + Select Operator [SEL_247] (rows=143930993 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_233] (rows=285117831 width=7) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_33_date_dim_d_date_sk_min) AND DynamicValue(RS_33_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_33_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_24] (rows=287989836 width=7) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_232] - Group By Operator [GBY_231] (rows=1 width=12) + Filter Operator [FIL_246] (rows=143930993 width=7) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_60_date_dim_d_date_sk_min) AND DynamicValue(RS_60_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_60_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_53] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_245] + Group By Operator [GBY_244] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_207] - Group By Operator [GBY_204] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_215] + Group By Operator [GBY_212] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_200] (rows=317 width=4) + Select Operator [SEL_209] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_196] - <-Reducer 16 [CONTAINS] vectorized - Reduce Output Operator [RS_250] + Please refer to the previous Select Operator [SEL_203] + <-Reducer 3 [CONTAINS] vectorized + Reduce Output Operator [RS_225] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_249] (rows=165330890 width=282) + Group By Operator [GBY_224] (rows=165330890 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_248] (rows=24986582 width=282) + Group By Operator [GBY_223] (rows=91197425 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_247] (rows=24986582 width=274) + Select Operator [SEL_222] (rows=91197425 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_246] (rows=24986582 width=274) + Group By Operator [GBY_221] (rows=91197425 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_65] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_64] (rows=24986582 width=274) - Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 - Merge Join Operator [MERGEJOIN_176] (rows=24986582 width=274) - Conds:RS_60._col1=RS_217._col0(Inner),Output:["_col3","_col5","_col6"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_217] + Group By Operator [GBY_16] (rows=91197425 width=274) + Output:["_col0","_col1","_col2"],keys:_col2, _col1, _col6 + Merge Join Operator [MERGEJOIN_175] (rows=91197425 width=274) + Conds:RS_199._col0=RS_13._col1(Inner),Output:["_col1","_col2","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_199] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_214] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_60] + Please refer to the previous Select Operator [SEL_198] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_13] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_175] (rows=24986582 width=97) - Conds:RS_245._col0=RS_201._col0(Inner),Output:["_col1","_col3"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_201] + Merge Join Operator [MERGEJOIN_174] (rows=91197425 width=96) + Conds:RS_220._col0=RS_204._col0(Inner),Output:["_col1","_col3"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_204] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_196] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_245] + Please refer to the previous Select Operator [SEL_203] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_220] PartitionCols:_col0 - Select Operator [SEL_244] (rows=143930993 width=7) + Select Operator [SEL_219] (rows=525327388 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_243] (rows=143930993 width=7) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_58_date_dim_d_date_sk_min) AND DynamicValue(RS_58_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_58_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_49] (rows=144002668 width=7) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_242] - Group By Operator [GBY_241] (rows=1 width=12) + Filter Operator [FIL_218] (rows=525327388 width=7) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_2] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_217] + Group By Operator [GBY_216] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_208] - Group By Operator [GBY_205] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_213] + Group By Operator [GBY_210] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_202] (rows=317 width=4) + Select Operator [SEL_205] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_196] - <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_222] + Please refer to the previous Select Operator [SEL_203] + <-Reducer 8 [CONTAINS] vectorized + Reduce Output Operator [RS_243] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_221] (rows=165330890 width=282) + Group By Operator [GBY_242] (rows=165330890 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_220] (rows=91197425 width=282) + Group By Operator [GBY_241] (rows=49146883 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_219] (rows=91197425 width=274) + Select Operator [SEL_240] (rows=49146883 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_218] (rows=91197425 width=274) + Group By Operator [GBY_239] (rows=49146883 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_16] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_42] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_15] (rows=91197425 width=274) - Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 - Merge Join Operator [MERGEJOIN_172] (rows=91197425 width=274) - Conds:RS_11._col1=RS_215._col0(Inner),Output:["_col3","_col5","_col6"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] + Group By Operator [GBY_41] (rows=49146883 width=274) + Output:["_col0","_col1","_col2"],keys:_col2, _col1, _col6 + Merge Join Operator [MERGEJOIN_177] (rows=49146883 width=274) + Conds:RS_200._col0=RS_38._col1(Inner),Output:["_col1","_col2","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_200] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_214] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_11] + Please refer to the previous Select Operator [SEL_198] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_38] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_171] (rows=91197425 width=96) - Conds:RS_213._col0=RS_197._col0(Inner),Output:["_col1","_col3"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_197] + Merge Join Operator [MERGEJOIN_176] (rows=49146883 width=97) + Conds:RS_238._col0=RS_206._col0(Inner),Output:["_col1","_col3"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_206] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_196] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_213] + Please refer to the previous Select Operator [SEL_203] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_238] PartitionCols:_col0 - Select Operator [SEL_212] (rows=525327388 width=7) + Select Operator [SEL_237] (rows=285117831 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_211] (rows=525327388 width=7) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=7) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_210] - Group By Operator [GBY_209] (rows=1 width=12) + Filter Operator [FIL_236] (rows=285117831 width=7) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_34_date_dim_d_date_sk_min) AND DynamicValue(RS_34_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_34_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_27] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_235] + Group By Operator [GBY_234] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_206] - Group By Operator [GBY_203] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_214] + Group By Operator [GBY_211] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_198] (rows=317 width=4) + Select Operator [SEL_207] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_196] + Please refer to the previous Select Operator [SEL_203] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query4.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query4.q.out index 5f5322c38b..11ed960a3c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query4.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query4.q.out @@ -229,397 +229,399 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 35 (BROADCAST_EDGE) -Map 11 <- Reducer 36 (BROADCAST_EDGE) -Map 15 <- Reducer 37 (BROADCAST_EDGE) -Map 19 <- Reducer 34 (BROADCAST_EDGE) -Map 23 <- Reducer 33 (BROADCAST_EDGE) -Map 27 <- Reducer 32 (BROADCAST_EDGE) +Map 20 <- Reducer 23 (BROADCAST_EDGE) +Map 34 <- Reducer 25 (BROADCAST_EDGE) +Map 35 <- Reducer 27 (BROADCAST_EDGE) +Map 36 <- Reducer 29 (BROADCAST_EDGE) +Map 37 <- Reducer 31 (BROADCAST_EDGE) +Map 38 <- Reducer 33 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 13 <- Map 38 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 17 <- Map 38 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 21 <- Map 38 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (SIMPLE_EDGE) -Reducer 24 <- Map 23 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 25 <- Map 38 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (SIMPLE_EDGE) -Reducer 28 <- Map 27 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 29 <- Map 38 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) -Reducer 3 <- Map 38 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Reducer 29 (SIMPLE_EDGE) -Reducer 32 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 34 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 35 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 36 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 37 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 18 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 22 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 26 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 30 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 12 <- Map 1 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 16 <- Map 1 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Map 1 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) +Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 22 (SIMPLE_EDGE), Map 34 (SIMPLE_EDGE) +Reducer 25 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 22 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) +Reducer 27 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 22 (SIMPLE_EDGE), Map 36 (SIMPLE_EDGE) +Reducer 29 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 22 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) +Reducer 31 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 32 <- Map 22 (SIMPLE_EDGE), Map 38 (SIMPLE_EDGE) +Reducer 33 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 1 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 10 vectorized - File Output Operator [FS_563] - Limit [LIM_562] (rows=100 width=85) + Reducer 5 vectorized + File Output Operator [FS_566] + Limit [LIM_565] (rows=100 width=85) Number of rows:100 - Select Operator [SEL_561] (rows=7323197 width=85) + Select Operator [SEL_564] (rows=7323197 width=85) Output:["_col0"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_135] - Select Operator [SEL_134] (rows=7323197 width=85) + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_144] + Select Operator [SEL_143] (rows=7323197 width=85) Output:["_col0"] - Top N Key Operator [TNK_250] (rows=7323197 width=537) - keys:_col13,top n:100 - Filter Operator [FIL_133] (rows=7323197 width=537) - predicate:CASE WHEN (_col4 is not null) THEN (CASE WHEN (_col7) THEN (((_col9 / _col6) > (_col14 / _col4))) ELSE (false) END) ELSE (false) END - Merge Join Operator [MERGEJOIN_467] (rows=14646395 width=537) - Conds:RS_130._col3=RS_560._col0(Inner),Output:["_col4","_col6","_col7","_col9","_col13","_col14"] - <-Reducer 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_560] + Top N Key Operator [TNK_259] (rows=7323197 width=537) + keys:_col1,top n:100 + Filter Operator [FIL_142] (rows=7323197 width=537) + predicate:CASE WHEN (_col11 is not null) THEN (CASE WHEN (_col14) THEN (((_col6 / _col13) > (_col2 / _col11))) ELSE (false) END) ELSE (false) END + Merge Join Operator [MERGEJOIN_470] (rows=14646395 width=537) + Conds:RS_517._col0=RS_140._col7(Inner),Output:["_col1","_col2","_col6","_col11","_col13","_col14"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_517] PartitionCols:_col0 - Select Operator [SEL_559] (rows=80000000 width=297) + Select Operator [SEL_516] (rows=80000000 width=297) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_558] (rows=80000000 width=764) + Group By Operator [GBY_515] (rows=80000000 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_114] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_113] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_462] (rows=187573258 width=764) - Conds:RS_109._col1=RS_506._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_506] + Group By Operator [GBY_16] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_455] (rows=187573258 width=764) + Conds:RS_472._col0=RS_13._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_472] PartitionCols:_col0 - Select Operator [SEL_505] (rows=80000000 width=656) + Select Operator [SEL_471] (rows=80000000 width=656) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - TableScan [TS_104] (rows=80000000 width=656) + TableScan [TS_0] (rows=80000000 width=656) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] - <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_109] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_13] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_461] (rows=187573258 width=115) - Conds:RS_557._col0=RS_476._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_476] + Merge Join Operator [MERGEJOIN_454] (rows=187573258 width=115) + Conds:RS_514._col0=RS_486._col0(Inner),Output:["_col1","_col2"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_486] PartitionCols:_col0 - Select Operator [SEL_472] (rows=652 width=4) + Select Operator [SEL_482] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_468] (rows=652 width=8) + Filter Operator [FIL_478] (rows=652 width=8) predicate:(d_year = 2002) - TableScan [TS_101] (rows=73049 width=8) + TableScan [TS_5] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_557] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_514] PartitionCols:_col0 - Select Operator [SEL_556] (rows=525327388 width=119) + Select Operator [SEL_513] (rows=525327388 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_555] (rows=525327388 width=435) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_107_date_dim_d_date_sk_min) AND DynamicValue(RS_107_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_107_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_98] (rows=575995635 width=435) + Filter Operator [FIL_512] (rows=525327388 width=435) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_2] (rows=575995635 width=435) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] - <-Reducer 32 [BROADCAST_EDGE] vectorized - BROADCAST [RS_554] - Group By Operator [GBY_553] (rows=1 width=12) + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_511] + Group By Operator [GBY_510] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_494] - Group By Operator [GBY_488] (rows=1 width=12) + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_504] + Group By Operator [GBY_498] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_477] (rows=652 width=4) + Select Operator [SEL_487] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_472] + Please refer to the previous Select Operator [SEL_482] <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_130] - PartitionCols:_col3 - Filter Operator [FIL_129] (rows=12248093 width=668) - predicate:CASE WHEN (_col2) THEN (CASE WHEN (_col7) THEN (((_col9 / _col6) > (_col11 / _col1))) ELSE (false) END) ELSE (false) END - Merge Join Operator [MERGEJOIN_466] (rows=24496187 width=668) - Conds:RS_126._col3=RS_552._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col9","_col11"] - <-Reducer 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_552] - PartitionCols:_col0 - Select Operator [SEL_551] (rows=51391963 width=212) - Output:["_col0","_col1"] - Group By Operator [GBY_550] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_95] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_94] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_460] (rows=51391963 width=764) - Conds:RS_90._col1=RS_507._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_507] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_505] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_90] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_459] (rows=51391963 width=115) - Conds:RS_549._col0=RS_478._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_478] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_472] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_549] - PartitionCols:_col0 - Select Operator [SEL_548] (rows=143930993 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_547] (rows=143930993 width=455) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_88_date_dim_d_date_sk_min) AND DynamicValue(RS_88_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_88_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_79] (rows=144002668 width=455) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] - <-Reducer 33 [BROADCAST_EDGE] vectorized - BROADCAST [RS_546] - Group By Operator [GBY_545] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_495] - Group By Operator [GBY_489] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_479] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_472] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_126] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_465] (rows=20485012 width=556) - Conds:RS_123._col3=RS_544._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col9"] - <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_544] - PartitionCols:_col0 - Select Operator [SEL_543] (rows=80000000 width=212) - Output:["_col0","_col1"] - Group By Operator [GBY_542] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_76] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_75] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_458] (rows=101084444 width=764) - Conds:RS_71._col1=RS_508._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_508] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_505] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_71] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_457] (rows=101084444 width=115) - Conds:RS_541._col0=RS_480._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_480] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_472] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_541] - PartitionCols:_col0 - Select Operator [SEL_540] (rows=285117831 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_539] (rows=285117831 width=453) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_69_date_dim_d_date_sk_min) AND DynamicValue(RS_69_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_69_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_60] (rows=287989836 width=453) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] - <-Reducer 34 [BROADCAST_EDGE] vectorized - BROADCAST [RS_538] - Group By Operator [GBY_537] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_496] - Group By Operator [GBY_490] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_481] (rows=652 width=4) + SHUFFLE [RS_140] + PartitionCols:_col7 + Select Operator [SEL_138] (rows=12248093 width=668) + Output:["_col3","_col7","_col8","_col10","_col11"] + Filter Operator [FIL_137] (rows=12248093 width=668) + predicate:CASE WHEN (_col6) THEN (CASE WHEN (_col11) THEN (((_col3 / _col10) > (_col1 / _col5))) ELSE (false) END) ELSE (false) END + Merge Join Operator [MERGEJOIN_469] (rows=24496187 width=668) + Conds:RS_525._col0=RS_135._col5(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col10","_col11"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_135] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_468] (rows=20485012 width=556) + Conds:RS_533._col0=RS_131._col3(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col8","_col9"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_533] + PartitionCols:_col0 + Select Operator [SEL_532] (rows=80000000 width=212) + Output:["_col0","_col1"] + Group By Operator [GBY_531] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_56] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_459] (rows=101084444 width=764) + Conds:RS_474._col0=RS_53._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_474] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_471] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_458] (rows=101084444 width=115) + Conds:RS_530._col0=RS_490._col0(Inner),Output:["_col1","_col2"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_490] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_482] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_530] + PartitionCols:_col0 + Select Operator [SEL_529] (rows=285117831 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_528] (rows=285117831 width=453) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_49_date_dim_d_date_sk_min) AND DynamicValue(RS_49_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_49_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_42] (rows=287989836 width=453) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_527] + Group By Operator [GBY_526] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_506] + Group By Operator [GBY_500] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_491] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_482] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_131] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_467] (rows=17130654 width=444) + Conds:RS_126._col3=RS_563._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_126] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_466] (rows=17130654 width=328) + Conds:RS_543._col0=RS_553._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_543] + PartitionCols:_col0 + Select Operator [SEL_542] (rows=17130654 width=216) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_541] (rows=17130654 width=212) + predicate:(_col7 > 0) + Select Operator [SEL_540] (rows=51391963 width=212) + Output:["_col0","_col7"] + Group By Operator [GBY_539] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_76] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_461] (rows=51391963 width=764) + Conds:RS_475._col0=RS_73._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_475] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_471] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_73] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_460] (rows=51391963 width=115) + Conds:RS_538._col0=RS_492._col0(Inner),Output:["_col1","_col2"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_492] + PartitionCols:_col0 + Select Operator [SEL_483] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_472] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_123] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_464] (rows=17130654 width=444) - Conds:RS_120._col3=RS_536._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7"] - <-Reducer 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_536] - PartitionCols:_col0 - Select Operator [SEL_535] (rows=26666666 width=216) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_534] (rows=26666666 width=212) - predicate:(_col7 > 0) - Select Operator [SEL_533] (rows=80000000 width=212) - Output:["_col0","_col7"] - Group By Operator [GBY_532] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_55] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_456] (rows=101084444 width=764) - Conds:RS_51._col1=RS_511._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_511] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_505] + Filter Operator [FIL_479] (rows=652 width=8) + predicate:(d_year = 2001) + Please refer to the previous TableScan [TS_5] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_538] + PartitionCols:_col0 + Select Operator [SEL_537] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_536] (rows=143930993 width=455) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_69_date_dim_d_date_sk_min) AND DynamicValue(RS_69_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_69_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_62] (rows=144002668 width=455) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_535] + Group By Operator [GBY_534] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_507] + Group By Operator [GBY_501] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_493] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_483] + <-Reducer 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_553] + PartitionCols:_col0 + Select Operator [SEL_552] (rows=26666666 width=212) + Output:["_col0","_col1"] + Filter Operator [FIL_551] (rows=26666666 width=212) + predicate:(_col7 > 0) + Select Operator [SEL_550] (rows=80000000 width=212) + Output:["_col0","_col7"] + Group By Operator [GBY_549] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_455] (rows=101084444 width=115) - Conds:RS_531._col0=RS_486._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_486] - PartitionCols:_col0 - Select Operator [SEL_475] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_471] (rows=652 width=8) - predicate:(d_year = 2001) - Please refer to the previous TableScan [TS_101] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_531] - PartitionCols:_col0 - Select Operator [SEL_530] (rows=285117831 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_529] (rows=285117831 width=453) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_49_date_dim_d_date_sk_min) AND DynamicValue(RS_49_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_49_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_40] (rows=287989836 width=453) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] - <-Reducer 37 [BROADCAST_EDGE] vectorized - BROADCAST [RS_528] - Group By Operator [GBY_527] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_499] - Group By Operator [GBY_493] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_487] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_475] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_120] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_463] (rows=17130654 width=328) - Conds:RS_516._col0=RS_526._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Reducer 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_526] - PartitionCols:_col0 - Select Operator [SEL_525] (rows=26666666 width=212) - Output:["_col0","_col1"] - Filter Operator [FIL_524] (rows=26666666 width=212) - predicate:(_col7 > 0) - Select Operator [SEL_523] (rows=80000000 width=212) - Output:["_col0","_col7"] - Group By Operator [GBY_522] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_35] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_454] (rows=187573258 width=764) - Conds:RS_31._col1=RS_510._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_510] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_505] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_453] (rows=187573258 width=115) - Conds:RS_521._col0=RS_484._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_484] - PartitionCols:_col0 - Select Operator [SEL_474] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_470] (rows=652 width=8) - predicate:(d_year = 2001) - Please refer to the previous TableScan [TS_101] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_521] - PartitionCols:_col0 - Select Operator [SEL_520] (rows=525327388 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_519] (rows=525327388 width=435) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_29_date_dim_d_date_sk_min) AND DynamicValue(RS_29_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_29_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_20] (rows=575995635 width=435) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] - <-Reducer 36 [BROADCAST_EDGE] vectorized - BROADCAST [RS_518] - Group By Operator [GBY_517] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_498] - Group By Operator [GBY_492] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_485] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_474] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_516] - PartitionCols:_col0 - Select Operator [SEL_515] (rows=17130654 width=216) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_514] (rows=17130654 width=212) - predicate:(_col7 > 0) - Select Operator [SEL_513] (rows=51391963 width=212) - Output:["_col0","_col7"] - Group By Operator [GBY_512] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_15] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_452] (rows=51391963 width=764) - Conds:RS_11._col1=RS_509._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_509] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_505] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_11] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_451] (rows=51391963 width=115) - Conds:RS_504._col0=RS_482._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_482] - PartitionCols:_col0 - Select Operator [SEL_473] (rows=652 width=4) + SHUFFLE [RS_98] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_97] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_463] (rows=187573258 width=764) + Conds:RS_476._col0=RS_94._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_476] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_471] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_94] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_462] (rows=187573258 width=115) + Conds:RS_548._col0=RS_494._col0(Inner),Output:["_col1","_col2"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_494] + PartitionCols:_col0 + Select Operator [SEL_484] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_480] (rows=652 width=8) + predicate:(d_year = 2001) + Please refer to the previous TableScan [TS_5] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_548] + PartitionCols:_col0 + Select Operator [SEL_547] (rows=525327388 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_546] (rows=525327388 width=435) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_90_date_dim_d_date_sk_min) AND DynamicValue(RS_90_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_90_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_83] (rows=575995635 width=435) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] + <-Reducer 31 [BROADCAST_EDGE] vectorized + BROADCAST [RS_545] + Group By Operator [GBY_544] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_508] + Group By Operator [GBY_502] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_495] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_484] + <-Reducer 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_563] + PartitionCols:_col0 + Select Operator [SEL_562] (rows=26666666 width=216) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_561] (rows=26666666 width=212) + predicate:(_col7 > 0) + Select Operator [SEL_560] (rows=80000000 width=212) + Output:["_col0","_col7"] + Group By Operator [GBY_559] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_119] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_118] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_465] (rows=101084444 width=764) + Conds:RS_477._col0=RS_115._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_477] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_471] + <-Reducer 32 [SIMPLE_EDGE] + SHUFFLE [RS_115] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_464] (rows=101084444 width=115) + Conds:RS_558._col0=RS_496._col0(Inner),Output:["_col1","_col2"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_496] + PartitionCols:_col0 + Select Operator [SEL_485] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_481] (rows=652 width=8) + predicate:(d_year = 2001) + Please refer to the previous TableScan [TS_5] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_558] + PartitionCols:_col0 + Select Operator [SEL_557] (rows=285117831 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_556] (rows=285117831 width=453) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_111_date_dim_d_date_sk_min) AND DynamicValue(RS_111_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_111_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_104] (rows=287989836 width=453) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] + <-Reducer 33 [BROADCAST_EDGE] vectorized + BROADCAST [RS_555] + Group By Operator [GBY_554] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_509] + Group By Operator [GBY_503] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_497] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_485] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_525] + PartitionCols:_col0 + Select Operator [SEL_524] (rows=51391963 width=212) + Output:["_col0","_col1"] + Group By Operator [GBY_523] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_36] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_457] (rows=51391963 width=764) + Conds:RS_473._col0=RS_33._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_473] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_471] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_456] (rows=51391963 width=115) + Conds:RS_522._col0=RS_488._col0(Inner),Output:["_col1","_col2"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_488] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_482] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_522] + PartitionCols:_col0 + Select Operator [SEL_521] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_520] (rows=143930993 width=455) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_29_date_dim_d_date_sk_min) AND DynamicValue(RS_29_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_29_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_22] (rows=144002668 width=455) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_519] + Group By Operator [GBY_518] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_505] + Group By Operator [GBY_499] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_489] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_469] (rows=652 width=8) - predicate:(d_year = 2001) - Please refer to the previous TableScan [TS_101] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_504] - PartitionCols:_col0 - Select Operator [SEL_503] (rows=143930993 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_502] (rows=143930993 width=455) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=455) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] - <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_501] - Group By Operator [GBY_500] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_497] - Group By Operator [GBY_491] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_483] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_473] + Please refer to the previous Select Operator [SEL_482] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query40.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query40.q.out index a44ad592d7..05925e23f3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query40.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query40.q.out @@ -67,105 +67,105 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Map 5 <- Reducer 12 (BROADCAST_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 7 <- Map 10 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_127] - Limit [LIM_126] (rows=100 width=410) + Reducer 4 vectorized + File Output Operator [FS_128] + Limit [LIM_127] (rows=100 width=410) Number of rows:100 - Select Operator [SEL_125] (rows=769995 width=410) + Select Operator [SEL_126] (rows=769995 width=410) Output:["_col0","_col1","_col2","_col3"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_124] - Group By Operator [GBY_123] (rows=769995 width=410) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] + Group By Operator [GBY_124] (rows=769995 width=410) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_28] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_29] PartitionCols:_col0, _col1 - Group By Operator [GBY_27] (rows=5757278 width=410) + Group By Operator [GBY_28] (rows=5757278 width=410) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1 - Select Operator [SEL_25] (rows=5757278 width=278) + Select Operator [SEL_26] (rows=5757278 width=278) Output:["_col0","_col1","_col2","_col3"] - Top N Key Operator [TNK_56] (rows=5757278 width=278) - keys:_col14, _col12,top n:100 - Merge Join Operator [MERGEJOIN_104] (rows=5757278 width=278) - Conds:RS_22._col1=RS_122._col0(Inner),Output:["_col4","_col7","_col9","_col10","_col12","_col14"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_122] + Top N Key Operator [TNK_57] (rows=5757278 width=278) + keys:_col1, _col14,top n:100 + Merge Join Operator [MERGEJOIN_105] (rows=5757278 width=278) + Conds:RS_107._col0=RS_24._col1(Inner),Output:["_col1","_col6","_col9","_col11","_col12","_col14"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_107] PartitionCols:_col0 - Select Operator [SEL_121] (rows=27 width=90) + Select Operator [SEL_106] (rows=27 width=90) Output:["_col0","_col1"] - TableScan [TS_11] (rows=27 width=90) + TableScan [TS_0] (rows=27 width=90) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_state"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_22] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_103] (rows=5757278 width=195) - Conds:RS_19._col2=RS_107._col0(Inner),Output:["_col1","_col4","_col7","_col9","_col10","_col12"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_107] + Merge Join Operator [MERGEJOIN_104] (rows=5757278 width=195) + Conds:RS_19._col2=RS_110._col0(Inner),Output:["_col1","_col4","_col7","_col9","_col10","_col12"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_110] PartitionCols:_col0 - Select Operator [SEL_106] (rows=51333 width=104) + Select Operator [SEL_109] (rows=51333 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_105] (rows=51333 width=215) + Filter Operator [FIL_108] (rows=51333 width=215) predicate:i_current_price BETWEEN 0.99 AND 1.49 - TableScan [TS_8] (rows=462000 width=215) + TableScan [TS_10] (rows=462000 width=215) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_current_price"] - <-Reducer 3 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_102] (rows=51815831 width=124) - Conds:RS_16._col0=RS_120._col0(Inner),Output:["_col1","_col2","_col4","_col7","_col9","_col10"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] + Merge Join Operator [MERGEJOIN_103] (rows=51815831 width=124) + Conds:RS_16._col0=RS_123._col0(Inner),Output:["_col1","_col2","_col4","_col7","_col9","_col10"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_123] PartitionCols:_col0 - Select Operator [SEL_119] (rows=8116 width=12) + Select Operator [SEL_122] (rows=8116 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_118] (rows=8116 width=98) + Filter Operator [FIL_121] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' - TableScan [TS_5] (rows=73049 width=98) + TableScan [TS_7] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_16] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_101] (rows=466374405 width=167) - Conds:RS_115._col2, _col3=RS_117._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col7"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_115] + Merge Join Operator [MERGEJOIN_102] (rows=466374405 width=167) + Conds:RS_118._col2, _col3=RS_120._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col7"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_118] PartitionCols:_col2, _col3 - Select Operator [SEL_114] (rows=285115816 width=127) + Select Operator [SEL_117] (rows=285115816 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_113] (rows=285115816 width=127) + Filter Operator [FIL_116] (rows=285115816 width=127) predicate:(cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) - TableScan [TS_0] (rows=287989836 width=127) + TableScan [TS_2] (rows=287989836 width=127) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_warehouse_sk","cs_item_sk","cs_order_number","cs_sales_price"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_112] - Group By Operator [GBY_111] (rows=1 width=12) + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_115] + Group By Operator [GBY_114] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] - Group By Operator [GBY_109] (rows=1 width=12) + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_113] + Group By Operator [GBY_112] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_108] (rows=51333 width=4) + Select Operator [SEL_111] (rows=51333 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_106] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] + Please refer to the previous Select Operator [SEL_109] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_120] PartitionCols:_col0, _col1 - Select Operator [SEL_116] (rows=28798881 width=117) + Select Operator [SEL_119] (rows=28798881 width=117) Output:["_col0","_col1","_col2"] - TableScan [TS_3] (rows=28798881 width=117) + TableScan [TS_5] (rows=28798881 width=117) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash"] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out index e1691b5f33..8171b4ddc1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query44.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[112][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[113][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 7' is a cross product PREHOOK: query: explain select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing from(select * @@ -76,130 +76,130 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 11 <- Map 10 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 10 <- Reducer 7 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 6 <- Map 12 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 12 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 3 (SIMPLE_EDGE) +Reducer 9 <- Reducer 10 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_149] - Limit [LIM_148] (rows=100 width=218) + Reducer 4 vectorized + File Output Operator [FS_150] + Limit [LIM_149] (rows=100 width=218) Number of rows:100 - Select Operator [SEL_147] (rows=6951 width=218) + Select Operator [SEL_148] (rows=6951 width=218) Output:["_col0","_col1","_col2"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_70] - Select Operator [SEL_69] (rows=6951 width=218) + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_71] + Select Operator [SEL_70] (rows=6951 width=218) Output:["_col0","_col1","_col2"] - Top N Key Operator [TNK_99] (rows=6951 width=218) - keys:_col1,top n:100 - Merge Join Operator [MERGEJOIN_116] (rows=6951 width=218) - Conds:RS_66._col2=RS_146._col0(Inner),Output:["_col1","_col5","_col7"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] + Top N Key Operator [TNK_100] (rows=6951 width=218) + keys:_col3,top n:100 + Merge Join Operator [MERGEJOIN_117] (rows=6951 width=218) + Conds:RS_67._col4=RS_120._col0(Inner),Output:["_col1","_col3","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_120] PartitionCols:_col0 - Select Operator [SEL_144] (rows=462000 width=111) + Select Operator [SEL_118] (rows=462000 width=111) Output:["_col0","_col1"] - TableScan [TS_56] (rows=462000 width=111) + TableScan [TS_0] (rows=462000 width=111) default@item,i1,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_product_name"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_66] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_115] (rows=6951 width=115) - Conds:RS_63._col0=RS_145._col0(Inner),Output:["_col1","_col2","_col5"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_145] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_67] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_116] (rows=6951 width=115) + Conds:RS_119._col0=RS_65._col0(Inner),Output:["_col1","_col3","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_119] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_144] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_63] + Please refer to the previous Select Operator [SEL_118] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_65] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_114] (rows=6951 width=12) - Conds:RS_138._col1=RS_143._col1(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] + Merge Join Operator [MERGEJOIN_115] (rows=6951 width=12) + Conds:RS_142._col1=RS_147._col1(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_147] PartitionCols:_col1 - Select Operator [SEL_137] (rows=6951 width=8) + Select Operator [SEL_146] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_136] (rows=6951 width=116) + Filter Operator [FIL_145] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_135] (rows=20854 width=116) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_134] (rows=20854 width=116) + PTF Operator [PTF_144] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_143] (rows=20854 width=116) Output:["_col0","_col1"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_51] PartitionCols:0 - Top N Key Operator [TNK_100] (rows=20854 width=228) + Top N Key Operator [TNK_102] (rows=20854 width=228) keys:_col1,top n:11 - Filter Operator [FIL_20] (rows=20854 width=228) + Filter Operator [FIL_22] (rows=20854 width=228) predicate:(_col1 > (0.9 * _col2)) - Merge Join Operator [MERGEJOIN_112] (rows=62562 width=228) + Merge Join Operator [MERGEJOIN_113] (rows=62562 width=228) Conds:(Inner),Output:["_col0","_col1","_col2"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_133] - Select Operator [SEL_132] (rows=1 width=112) + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_137] + Select Operator [SEL_136] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_131] (rows=1 width=120) + Filter Operator [FIL_135] (rows=1 width=120) predicate:CAST( (_col1 / _col2) AS decimal(11,6)) is not null - Select Operator [SEL_130] (rows=1 width=120) + Select Operator [SEL_134] (rows=1 width=120) Output:["_col1","_col2"] - Group By Operator [GBY_129] (rows=1 width=124) + Group By Operator [GBY_133] (rows=1 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_132] PartitionCols:_col0 - Group By Operator [GBY_127] (rows=258 width=124) + Group By Operator [GBY_131] (rows=258 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:true - Select Operator [SEL_126] (rows=287946 width=114) + Select Operator [SEL_130] (rows=287946 width=114) Output:["_col1"] - Filter Operator [FIL_125] (rows=287946 width=114) + Filter Operator [FIL_129] (rows=287946 width=114) predicate:(ss_hdemo_sk is null and (ss_store_sk = 410)) - TableScan [TS_8] (rows=575995635 width=114) + TableScan [TS_10] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_124] - Select Operator [SEL_123] (rows=62562 width=116) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_128] + Select Operator [SEL_127] (rows=62562 width=116) Output:["_col0","_col1"] - Filter Operator [FIL_122] (rows=62562 width=124) + Filter Operator [FIL_126] (rows=62562 width=124) predicate:CAST( (_col1 / _col2) AS decimal(11,6)) is not null - Group By Operator [GBY_121] (rows=62562 width=124) + Group By Operator [GBY_125] (rows=62562 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_124] PartitionCols:_col0 - Group By Operator [GBY_119] (rows=3199976 width=124) + Group By Operator [GBY_123] (rows=3199976 width=124) Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_118] (rows=6399952 width=114) + Select Operator [SEL_122] (rows=6399952 width=114) Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_117] (rows=6399952 width=114) + Filter Operator [FIL_121] (rows=6399952 width=114) predicate:(ss_store_sk = 410) - TableScan [TS_0] (rows=575995635 width=114) + TableScan [TS_2] (rows=575995635 width=114) default@store_sales,ss1,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_142] PartitionCols:_col1 - Select Operator [SEL_142] (rows=6951 width=8) + Select Operator [SEL_141] (rows=6951 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_141] (rows=6951 width=116) + Filter Operator [FIL_140] (rows=6951 width=116) predicate:(rank_window_0 < 11) - PTF Operator [PTF_140] (rows=20854 width=116) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_139] (rows=20854 width=116) + PTF Operator [PTF_139] (rows=20854 width=116) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST","partition by:":"0"}] + Select Operator [SEL_138] (rows=20854 width=116) Output:["_col0","_col1"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_49] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_23] PartitionCols:0 Top N Key Operator [TNK_101] (rows=20854 width=228) keys:_col1,top n:11 - Please refer to the previous Filter Operator [FIL_20] + Please refer to the previous Filter Operator [FIL_22] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query46.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query46.q.out index 492e21bfca..e972c9be4b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query46.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query46.q.out @@ -99,120 +99,120 @@ Stage-0 limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_173] - Limit [LIM_172] (rows=100 width=594) + File Output Operator [FS_174] + Limit [LIM_173] (rows=100 width=594) Number of rows:100 - Select Operator [SEL_171] (rows=8380115 width=594) + Select Operator [SEL_172] (rows=8380115 width=594) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_44] - Select Operator [SEL_43] (rows=8380115 width=594) + SHUFFLE [RS_45] + Select Operator [SEL_44] (rows=8380115 width=594) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Top N Key Operator [TNK_78] (rows=8380115 width=594) + Top N Key Operator [TNK_79] (rows=8380115 width=594) keys:_col3, _col2, _col5, _col8, _col6,top n:100 - Filter Operator [FIL_42] (rows=8380115 width=594) + Filter Operator [FIL_43] (rows=8380115 width=594) predicate:(_col5 <> _col8) - Merge Join Operator [MERGEJOIN_144] (rows=8380115 width=594) - Conds:RS_39._col0=RS_170._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10"] + Merge Join Operator [MERGEJOIN_145] (rows=8380115 width=594) + Conds:RS_40._col0=RS_171._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_39] + SHUFFLE [RS_40] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_139] (rows=80000000 width=277) - Conds:RS_147._col1=RS_149._col0(Inner),Output:["_col0","_col2","_col3","_col5"] + Merge Join Operator [MERGEJOIN_140] (rows=80000000 width=277) + Conds:RS_148._col1=RS_150._col0(Inner),Output:["_col0","_col2","_col3","_col5"] <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] + SHUFFLE [RS_150] PartitionCols:_col0 - Select Operator [SEL_148] (rows=40000000 width=97) + Select Operator [SEL_149] (rows=40000000 width=97) Output:["_col0","_col1"] TableScan [TS_3] (rows=40000000 width=97) default@customer_address,current_addr,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_city"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_147] + SHUFFLE [RS_148] PartitionCols:_col1 - Select Operator [SEL_146] (rows=80000000 width=188) + Select Operator [SEL_147] (rows=80000000 width=188) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_145] (rows=80000000 width=188) + Filter Operator [FIL_146] (rows=80000000 width=188) predicate:c_current_addr_sk is not null TableScan [TS_0] (rows=80000000 width=188) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_170] + SHUFFLE [RS_171] PartitionCols:_col1 - Select Operator [SEL_169] (rows=8380115 width=321) + Select Operator [SEL_170] (rows=8380115 width=321) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_168] (rows=8380115 width=321) + Group By Operator [GBY_169] (rows=8380115 width=321) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_33] + SHUFFLE [RS_34] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_32] (rows=8380115 width=321) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)","sum(_col7)"],keys:_col1, _col12, _col3, _col5 - Merge Join Operator [MERGEJOIN_143] (rows=8380115 width=97) - Conds:RS_28._col3=RS_150._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col12"] + Group By Operator [GBY_33] (rows=8380115 width=321) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col8)","sum(_col9)"],keys:_col3, _col1, _col5, _col7 + Merge Join Operator [MERGEJOIN_144] (rows=8380115 width=97) + Conds:RS_151._col0=RS_30._col3(Inner),Output:["_col1","_col3","_col5","_col7","_col8","_col9"] <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_150] + SHUFFLE [RS_151] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_148] + Please refer to the previous Select Operator [SEL_149] <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_28] + SHUFFLE [RS_30] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_142] (rows=8380115 width=4) - Conds:RS_25._col2=RS_167._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_143] (rows=8380115 width=4) + Conds:RS_25._col2=RS_168._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_167] + SHUFFLE [RS_168] PartitionCols:_col0 - Select Operator [SEL_166] (rows=1855 width=4) + Select Operator [SEL_167] (rows=1855 width=4) Output:["_col0"] - Filter Operator [FIL_165] (rows=1855 width=12) + Filter Operator [FIL_166] (rows=1855 width=12) predicate:((hd_vehicle_count = 1) or (hd_dep_count = 2)) - TableScan [TS_14] (rows=7200 width=12) + TableScan [TS_16] (rows=7200 width=12) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_141] (rows=32526589 width=90) - Conds:RS_22._col4=RS_164._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_142] (rows=32526589 width=90) + Conds:RS_22._col4=RS_165._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] + SHUFFLE [RS_165] PartitionCols:_col0 - Select Operator [SEL_163] (rows=35 width=4) + Select Operator [SEL_164] (rows=35 width=4) Output:["_col0"] - Filter Operator [FIL_162] (rows=35 width=97) + Filter Operator [FIL_163] (rows=35 width=97) predicate:(s_city) IN ('Cedar Grove', 'Wildwood', 'Union', 'Salem', 'Highland Park') - TableScan [TS_11] (rows=1704 width=97) + TableScan [TS_13] (rows=1704 width=97) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_city"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_140] (rows=196204013 width=218) - Conds:RS_161._col0=RS_153._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_141] (rows=196204013 width=218) + Conds:RS_162._col0=RS_154._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] + SHUFFLE [RS_154] PartitionCols:_col0 - Select Operator [SEL_152] (rows=783 width=4) + Select Operator [SEL_153] (rows=783 width=4) Output:["_col0"] - Filter Operator [FIL_151] (rows=783 width=12) + Filter Operator [FIL_152] (rows=783 width=12) predicate:((d_year) IN (1998, 1999, 2000) and (d_dow) IN (6, 0)) - TableScan [TS_8] (rows=73049 width=12) + TableScan [TS_10] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dow"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_161] + SHUFFLE [RS_162] PartitionCols:_col0 - Select Operator [SEL_160] (rows=457565061 width=237) + Select Operator [SEL_161] (rows=457565061 width=237) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_159] (rows=457565061 width=237) + Filter Operator [FIL_160] (rows=457565061 width=237) predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_5] (rows=575995635 width=237) + TableScan [TS_7] (rows=575995635 width=237) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_ticket_number","ss_coupon_amt","ss_net_profit"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_158] - Group By Operator [GBY_157] (rows=1 width=12) + BROADCAST [RS_159] + Group By Operator [GBY_158] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] - Group By Operator [GBY_155] (rows=1 width=12) + SHUFFLE [RS_157] + Group By Operator [GBY_156] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_154] (rows=783 width=4) + Select Operator [SEL_155] (rows=783 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_152] + Please refer to the previous Select Operator [SEL_153] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query47.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query47.q.out index b444bbc32a..ecc6ff633c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query47.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query47.q.out @@ -129,140 +129,140 @@ Stage-0 limit:-1 Stage-1 Reducer 8 vectorized - File Output Operator [FS_322] - Limit [LIM_321] (rows=100 width=658) + File Output Operator [FS_323] + Limit [LIM_322] (rows=100 width=658) Number of rows:100 - Select Operator [SEL_320] (rows=301730 width=658) + Select Operator [SEL_321] (rows=301730 width=658) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_110] - Select Operator [SEL_109] (rows=301730 width=658) + SHUFFLE [RS_111] + Select Operator [SEL_110] (rows=301730 width=658) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Top N Key Operator [TNK_178] (rows=301730 width=546) - keys:(_col12 - _col13), _col11,top n:100 - Merge Join Operator [MERGEJOIN_279] (rows=301730 width=546) - Conds:RS_106._col6, _col7, _col8, _col9, _col14=RS_307._col0, _col1, _col2, _col3, _col5(Inner),Output:["_col4","_col6","_col10","_col11","_col12","_col13","_col19"] + Top N Key Operator [TNK_179] (rows=301730 width=546) + keys:(_col18 - _col19), _col17,top n:100 + Merge Join Operator [MERGEJOIN_280] (rows=301730 width=546) + Conds:RS_308._col0, _col1, _col2, _col3, _col5=RS_108._col6, _col7, _col8, _col9, _col14(Inner),Output:["_col4","_col10","_col12","_col16","_col17","_col18","_col19"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_307] + SHUFFLE [RS_308] PartitionCols:_col0, _col1, _col2, _col3, _col5 - Select Operator [SEL_305] (rows=1810380 width=485) + Select Operator [SEL_306] (rows=1810380 width=485) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_303] (rows=1810380 width=489) + Filter Operator [FIL_304] (rows=1810380 width=489) predicate:rank_window_0 is not null - PTF Operator [PTF_301] (rows=1810380 width=489) + PTF Operator [PTF_302] (rows=1810380 width=489) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS LAST, _col1 ASC NULLS LAST","partition by:":"_col5, _col4, _col2, _col3"}] - Select Operator [SEL_300] (rows=1810380 width=489) + Select Operator [SEL_301] (rows=1810380 width=489) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] + SHUFFLE [RS_299] PartitionCols:_col5, _col4, _col2, _col3 - Group By Operator [GBY_297] (rows=1810380 width=489) + Group By Operator [GBY_298] (rows=1810380 width=489) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_93] + SHUFFLE [RS_23] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_92] (rows=162257387 width=489) + Group By Operator [GBY_22] (rows=162257387 width=489) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col8, _col9, _col11, _col12 - Merge Join Operator [MERGEJOIN_277] (rows=162257387 width=472) - Conds:RS_88._col1=RS_296._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col11","_col12"] + Merge Join Operator [MERGEJOIN_272] (rows=162257387 width=472) + Conds:RS_18._col1=RS_297._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col11","_col12"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_296] + SHUFFLE [RS_297] PartitionCols:_col0 - Select Operator [SEL_295] (rows=462000 width=194) + Select Operator [SEL_296] (rows=462000 width=194) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_294] (rows=462000 width=194) + Filter Operator [FIL_295] (rows=462000 width=194) predicate:(i_category is not null and i_brand is not null) - TableScan [TS_79] (rows=462000 width=194) + TableScan [TS_9] (rows=462000 width=194) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_category"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_88] + SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_276] (rows=162257387 width=286) - Conds:RS_85._col2=RS_293._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col8","_col9"] + Merge Join Operator [MERGEJOIN_271] (rows=162257387 width=286) + Conds:RS_15._col2=RS_294._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col8","_col9"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_293] + SHUFFLE [RS_294] PartitionCols:_col0 - Select Operator [SEL_292] (rows=1704 width=183) + Select Operator [SEL_293] (rows=1704 width=183) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_291] (rows=1704 width=183) + Filter Operator [FIL_292] (rows=1704 width=183) predicate:(s_store_name is not null and s_company_name is not null) - TableScan [TS_76] (rows=1704 width=183) + TableScan [TS_6] (rows=1704 width=183) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_company_name"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_85] + SHUFFLE [RS_15] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_275] (rows=162257387 width=111) - Conds:RS_290._col0=RS_282._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + Merge Join Operator [MERGEJOIN_270] (rows=162257387 width=111) + Conds:RS_291._col0=RS_283._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_282] + SHUFFLE [RS_283] PartitionCols:_col0 - Select Operator [SEL_281] (rows=564 width=12) + Select Operator [SEL_282] (rows=564 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_280] (rows=564 width=12) + Filter Operator [FIL_281] (rows=564 width=12) predicate:((d_year) IN (2000, 1999, 2001) and ((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)))) - TableScan [TS_73] (rows=73049 width=12) + TableScan [TS_3] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_290] + SHUFFLE [RS_291] PartitionCols:_col0 - Select Operator [SEL_289] (rows=525329897 width=118) + Select Operator [SEL_290] (rows=525329897 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_288] (rows=525329897 width=118) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_83_date_dim_d_date_sk_min) AND DynamicValue(RS_83_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_83_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_70] (rows=575995635 width=118) + Filter Operator [FIL_289] (rows=525329897 width=118) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_287] - Group By Operator [GBY_286] (rows=1 width=12) + BROADCAST [RS_288] + Group By Operator [GBY_287] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_285] - Group By Operator [GBY_284] (rows=1 width=12) + SHUFFLE [RS_286] + Group By Operator [GBY_285] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_283] (rows=564 width=4) + Select Operator [SEL_284] (rows=564 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_281] + Please refer to the previous Select Operator [SEL_282] <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_106] + SHUFFLE [RS_108] PartitionCols:_col6, _col7, _col8, _col9, _col14 - Merge Join Operator [MERGEJOIN_278] (rows=301730 width=717) - Conds:RS_308._col0, _col1, _col2, _col3, _col5=RS_319._col0, _col1, _col2, _col3, _col8(Inner),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + Merge Join Operator [MERGEJOIN_279] (rows=301730 width=717) + Conds:RS_309._col0, _col1, _col2, _col3, _col5=RS_320._col0, _col1, _col2, _col3, _col8(Inner),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_308] + SHUFFLE [RS_309] PartitionCols:_col0, _col1, _col2, _col3, _col5 - Select Operator [SEL_306] (rows=1810380 width=485) + Select Operator [SEL_307] (rows=1810380 width=485) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_304] (rows=1810380 width=489) + Filter Operator [FIL_305] (rows=1810380 width=489) predicate:rank_window_0 is not null - PTF Operator [PTF_302] (rows=1810380 width=489) + PTF Operator [PTF_303] (rows=1810380 width=489) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS LAST, _col1 ASC NULLS LAST","partition by:":"_col5, _col4, _col2, _col3"}] - Please refer to the previous Select Operator [SEL_300] + Please refer to the previous Select Operator [SEL_301] <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] + SHUFFLE [RS_320] PartitionCols:_col0, _col1, _col2, _col3, _col8 - Select Operator [SEL_318] (rows=301730 width=605) + Select Operator [SEL_319] (rows=301730 width=605) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_317] (rows=301730 width=605) + Filter Operator [FIL_318] (rows=301730 width=605) predicate:CASE WHEN ((_col0 > 0)) THEN (((abs((_col7 - _col0)) / _col0) > 0.1)) ELSE (false) END - Select Operator [SEL_316] (rows=603460 width=601) + Select Operator [SEL_317] (rows=603460 width=601) Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_315] (rows=603460 width=601) + Filter Operator [FIL_316] (rows=603460 width=601) predicate:((_col0 > 0) and rank_window_1 is not null and (_col1 = 2000)) - PTF Operator [PTF_314] (rows=1810380 width=601) + PTF Operator [PTF_315] (rows=1810380 width=601) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST, _col2 ASC NULLS LAST","partition by:":"_col6, _col5, _col3, _col4"}] - Select Operator [SEL_313] (rows=1810380 width=601) + Select Operator [SEL_314] (rows=1810380 width=601) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] + SHUFFLE [RS_313] PartitionCols:_col5, _col4, _col2, _col3 - Select Operator [SEL_311] (rows=1810380 width=489) + Select Operator [SEL_312] (rows=1810380 width=489) Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - PTF Operator [PTF_310] (rows=1810380 width=489) + PTF Operator [PTF_311] (rows=1810380 width=489) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col0 ASC NULLS FIRST","partition by:":"_col5, _col4, _col2, _col3, _col0"}] - Select Operator [SEL_309] (rows=1810380 width=489) + Select Operator [SEL_310] (rows=1810380 width=489) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_299] + SHUFFLE [RS_300] PartitionCols:_col5, _col4, _col2, _col3, _col0 - Please refer to the previous Group By Operator [GBY_297] + Please refer to the previous Group By Operator [GBY_298] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out index 9d3505f8e7..865aa3c0b5 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out @@ -143,86 +143,86 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 7 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Map 6 <- Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 5 vectorized - File Output Operator [FS_92] - Group By Operator [GBY_91] (rows=1 width=8) + Reducer 3 vectorized + File Output Operator [FS_94] + Group By Operator [GBY_93] (rows=1 width=8) Output:["_col0"],aggregations:["sum(VALUE._col0)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_24] - Group By Operator [GBY_23] (rows=1 width=8) - Output:["_col0"],aggregations:["sum(_col3)"] - Select Operator [SEL_22] (rows=170127 width=24) - Output:["_col3"] - Filter Operator [FIL_21] (rows=170127 width=24) - predicate:((_col10 and _col4) or (_col11 and _col5) or (_col12 and _col6)) - Merge Join Operator [MERGEJOIN_73] (rows=226838 width=24) - Conds:RS_18._col2=RS_90._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col10","_col11","_col12"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_90] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_26] + Group By Operator [GBY_25] (rows=1 width=8) + Output:["_col0"],aggregations:["sum(_col8)"] + Select Operator [SEL_24] (rows=170127 width=24) + Output:["_col8"] + Filter Operator [FIL_23] (rows=170127 width=24) + predicate:((_col1 and _col9) or (_col2 and _col10) or (_col3 and _col11)) + Merge Join Operator [MERGEJOIN_75] (rows=226838 width=24) + Conds:RS_78._col0=RS_21._col3(Inner),Output:["_col1","_col2","_col3","_col8","_col9","_col10","_col11"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_78] PartitionCols:_col0 - Select Operator [SEL_89] (rows=3529412 width=16) + Select Operator [SEL_77] (rows=3529412 width=16) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_88] (rows=3529412 width=187) + Filter Operator [FIL_76] (rows=3529412 width=187) predicate:((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States')) - TableScan [TS_9] (rows=40000000 width=187) + TableScan [TS_0] (rows=40000000 width=187) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_72] (rows=2570826 width=12) - Conds:RS_15._col1=RS_87._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_87] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_74] (rows=2570826 width=12) + Conds:RS_81._col0=RS_17._col1(Inner),Output:["_col3","_col4","_col5","_col6","_col7"] + <-Map 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_81] PartitionCols:_col0 - Select Operator [SEL_86] (rows=29552 width=4) + Select Operator [SEL_80] (rows=29552 width=4) Output:["_col0"] - Filter Operator [FIL_85] (rows=29552 width=183) + Filter Operator [FIL_79] (rows=29552 width=183) predicate:((cd_marital_status = 'M') and (cd_education_status = '4 yr Degree')) - TableScan [TS_6] (rows=1861800 width=183) + TableScan [TS_3] (rows=1861800 width=183) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_15] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_71] (rows=57024544 width=22) - Conds:RS_84._col0=RS_76._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_76] + Merge Join Operator [MERGEJOIN_73] (rows=57024544 width=22) + Conds:RS_92._col0=RS_84._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_84] PartitionCols:_col0 - Select Operator [SEL_75] (rows=652 width=4) + Select Operator [SEL_83] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_74] (rows=652 width=8) + Filter Operator [FIL_82] (rows=652 width=8) predicate:(d_year = 1998) - TableScan [TS_3] (rows=73049 width=8) + TableScan [TS_9] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_84] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_92] PartitionCols:_col0 - Select Operator [SEL_83] (rows=159705894 width=27) + Select Operator [SEL_91] (rows=159705894 width=27) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_82] (rows=159705894 width=233) + Filter Operator [FIL_90] (rows=159705894 width=233) predicate:((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_addr_sk is not null and ss_store_sk is not null and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or (ss_sales_price >= 50) or (ss_sales_price <= 100) or (ss_sales_price >= 150) or (ss_sales_price <= 200)) and ((ss_net_profit >= 0) or (ss_net_profit <= 2000) or (ss_net_profit >= 150) or (ss_net_profit <= 3000) or (ss_net_profit >= 50) or (ss_net_profit <= 25000)) and ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=233) + TableScan [TS_6] (rows=575995635 width=233) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_81] - Group By Operator [GBY_80] (rows=1 width=12) + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_89] + Group By Operator [GBY_88] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_79] - Group By Operator [GBY_78] (rows=1 width=12) + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_87] + Group By Operator [GBY_86] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_77] (rows=652 width=4) + Select Operator [SEL_85] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_75] + Please refer to the previous Select Operator [SEL_83] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out index 32b932da4b..2dc66957c6 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out @@ -269,287 +269,287 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 13 (BROADCAST_EDGE) -Map 27 <- Reducer 19 (BROADCAST_EDGE) -Map 29 <- Reducer 25 (BROADCAST_EDGE) -Reducer 10 <- Union 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 12 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 15 <- Map 28 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Map 11 <- Reducer 14 (BROADCAST_EDGE) +Map 28 <- Reducer 20 (BROADCAST_EDGE) +Map 30 <- Reducer 26 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 13 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE) +Reducer 16 <- Map 27 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) Reducer 17 <- Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 19 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 20 <- Map 12 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE) -Reducer 21 <- Map 30 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 20 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 21 <- Map 13 (SIMPLE_EDGE), Map 30 (SIMPLE_EDGE) +Reducer 22 <- Map 29 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) Reducer 23 <- Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Reducer 23 (SIMPLE_EDGE), Union 9 (CONTAINS) -Reducer 25 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 26 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Reducer 24 (SIMPLE_EDGE), Union 8 (CONTAINS) +Reducer 26 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 8 <- Union 7 (SIMPLE_EDGE), Union 9 (CONTAINS) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 7 <- Union 6 (SIMPLE_EDGE), Union 8 (CONTAINS) +Reducer 9 <- Union 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 11 vectorized - File Output Operator [FS_315] - Limit [LIM_314] (rows=100 width=215) + Reducer 10 vectorized + File Output Operator [FS_318] + Limit [LIM_317] (rows=100 width=215) Number of rows:100 - Select Operator [SEL_313] (rows=3418 width=215) + Select Operator [SEL_316] (rows=3418 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] - Select Operator [SEL_311] (rows=3418 width=215) + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_315] + Select Operator [SEL_314] (rows=3418 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_310] (rows=3418 width=215) + Group By Operator [GBY_313] (rows=3418 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Union 9 [SIMPLE_EDGE] - <-Reducer 24 [CONTAINS] vectorized - Reduce Output Operator [RS_356] + <-Union 8 [SIMPLE_EDGE] + <-Reducer 25 [CONTAINS] vectorized + Reduce Output Operator [RS_359] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_355] (rows=3418 width=215) + Group By Operator [GBY_358] (rows=3418 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Top N Key Operator [TNK_354] (rows=3418 width=214) + Top N Key Operator [TNK_357] (rows=3418 width=214) keys:_col0, _col3, _col4, _col1, _col2,top n:100 - Select Operator [SEL_353] (rows=1142 width=213) + Select Operator [SEL_356] (rows=1142 width=213) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_352] (rows=1142 width=248) + Filter Operator [FIL_355] (rows=1142 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_351] (rows=1714 width=248) + PTF Operator [PTF_354] (rows=1714 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_350] (rows=1714 width=248) + Select Operator [SEL_353] (rows=1714 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_349] + <-Reducer 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_352] PartitionCols:0 - Select Operator [SEL_348] (rows=1714 width=244) + Select Operator [SEL_351] (rows=1714 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_347] (rows=1714 width=244) + PTF Operator [PTF_350] (rows=1714 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_346] (rows=1714 width=244) + Select Operator [SEL_349] (rows=1714 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_345] + <-Reducer 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_348] PartitionCols:0 - Group By Operator [GBY_344] (rows=1714 width=244) + Group By Operator [GBY_347] (rows=1714 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_89] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_92] PartitionCols:_col0 - Group By Operator [GBY_88] (rows=1714 width=244) + Group By Operator [GBY_91] (rows=1714 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0 - Select Operator [SEL_86] (rows=1673571 width=73) + Select Operator [SEL_89] (rows=1673571 width=73) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_242] (rows=1673571 width=73) - Conds:RS_83._col1, _col2=RS_343._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_343] + Merge Join Operator [MERGEJOIN_245] (rows=1673571 width=73) + Conds:RS_346._col0, _col1=RS_87._col1, _col2(Inner),Output:["_col2","_col3","_col5","_col7","_col8"] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_346] PartitionCols:_col0, _col1 - Select Operator [SEL_342] (rows=19197050 width=119) + Select Operator [SEL_345] (rows=19197050 width=119) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_341] (rows=19197050 width=119) + Filter Operator [FIL_344] (rows=19197050 width=119) predicate:(sr_return_amt > 10000) - TableScan [TS_77] (rows=57591150 width=119) + TableScan [TS_73] (rows=57591150 width=119) default@store_returns,sr,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_quantity","sr_return_amt"] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_83] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_87] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_241] (rows=1673571 width=8) - Conds:RS_340._col0=RS_277._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_277] + Merge Join Operator [MERGEJOIN_244] (rows=1673571 width=8) + Conds:RS_343._col0=RS_283._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_283] PartitionCols:_col0 - Select Operator [SEL_272] (rows=50 width=4) + Select Operator [SEL_278] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_271] (rows=50 width=12) + Filter Operator [FIL_277] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 12)) - TableScan [TS_3] (rows=73049 width=12) + TableScan [TS_6] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_340] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_343] PartitionCols:_col0 - Select Operator [SEL_339] (rows=61119617 width=229) + Select Operator [SEL_342] (rows=61119617 width=229) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_338] (rows=61119617 width=229) - predicate:((ss_net_profit > 1) and (ss_net_paid > 0) and (ss_quantity > 0) and ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_81_date_dim_d_date_sk_min) AND DynamicValue(RS_81_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_81_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_71] (rows=575995635 width=229) + Filter Operator [FIL_341] (rows=61119617 width=229) + predicate:((ss_net_profit > 1) and (ss_net_paid > 0) and (ss_quantity > 0) and ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_83_date_dim_d_date_sk_min) AND DynamicValue(RS_83_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_83_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_76] (rows=575995635 width=229) default@store_sales,sts,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_net_paid","ss_net_profit"] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_337] - Group By Operator [GBY_336] (rows=1 width=12) + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_340] + Group By Operator [GBY_339] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_284] - Group By Operator [GBY_281] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_290] + Group By Operator [GBY_287] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_278] (rows=50 width=4) + Select Operator [SEL_284] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_272] - <-Reducer 8 [CONTAINS] vectorized - Reduce Output Operator [RS_309] + Please refer to the previous Select Operator [SEL_278] + <-Reducer 7 [CONTAINS] vectorized + Reduce Output Operator [RS_312] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_308] (rows=3418 width=215) + Group By Operator [GBY_311] (rows=3418 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Top N Key Operator [TNK_307] (rows=3418 width=214) + Top N Key Operator [TNK_310] (rows=3418 width=214) keys:_col0, _col3, _col4, _col1, _col2,top n:100 - Select Operator [SEL_306] (rows=2276 width=215) + Select Operator [SEL_309] (rows=2276 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_305] (rows=2276 width=215) + Group By Operator [GBY_308] (rows=2276 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Union 7 [SIMPLE_EDGE] - <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_335] + <-Union 6 [SIMPLE_EDGE] + <-Reducer 19 [CONTAINS] vectorized + Reduce Output Operator [RS_338] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_334] (rows=2276 width=215) + Group By Operator [GBY_337] (rows=2276 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Select Operator [SEL_333] (rows=1134 width=215) + Select Operator [SEL_336] (rows=1134 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_332] (rows=1134 width=248) + Filter Operator [FIL_335] (rows=1134 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_331] (rows=1701 width=248) + PTF Operator [PTF_334] (rows=1701 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_330] (rows=1701 width=248) + Select Operator [SEL_333] (rows=1701 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_329] + <-Reducer 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_332] PartitionCols:0 - Select Operator [SEL_328] (rows=1701 width=244) + Select Operator [SEL_331] (rows=1701 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_327] (rows=1701 width=244) + PTF Operator [PTF_330] (rows=1701 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_326] (rows=1701 width=244) + Select Operator [SEL_329] (rows=1701 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_325] + <-Reducer 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_328] PartitionCols:0 - Group By Operator [GBY_324] (rows=1701 width=244) + Group By Operator [GBY_327] (rows=1701 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_50] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_52] PartitionCols:_col0 - Group By Operator [GBY_49] (rows=1701 width=244) + Group By Operator [GBY_51] (rows=1701 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0 - Select Operator [SEL_47] (rows=865646 width=188) + Select Operator [SEL_49] (rows=865646 width=188) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_240] (rows=865646 width=188) - Conds:RS_44._col1, _col2=RS_323._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_323] + Merge Join Operator [MERGEJOIN_243] (rows=865646 width=188) + Conds:RS_326._col0, _col1=RS_47._col1, _col2(Inner),Output:["_col2","_col3","_col5","_col7","_col8"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_326] PartitionCols:_col0, _col1 - Select Operator [SEL_322] (rows=9599627 width=121) + Select Operator [SEL_325] (rows=9599627 width=121) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_321] (rows=9599627 width=121) + Filter Operator [FIL_324] (rows=9599627 width=121) predicate:(cr_return_amount > 10000) - TableScan [TS_38] (rows=28798881 width=121) + TableScan [TS_33] (rows=28798881 width=121) default@catalog_returns,cr,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_quantity","cr_return_amount"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_44] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_47] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_239] (rows=865646 width=102) - Conds:RS_320._col0=RS_275._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_275] + Merge Join Operator [MERGEJOIN_242] (rows=865646 width=102) + Conds:RS_323._col0=RS_281._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_281] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_272] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + Please refer to the previous Select Operator [SEL_278] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] PartitionCols:_col0 - Select Operator [SEL_319] (rows=31838858 width=239) + Select Operator [SEL_322] (rows=31838858 width=239) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_318] (rows=31838858 width=239) - predicate:((cs_net_profit > 1) and (cs_net_paid > 0) and (cs_quantity > 0) and cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_42_date_dim_d_date_sk_min) AND DynamicValue(RS_42_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_32] (rows=287989836 width=239) + Filter Operator [FIL_321] (rows=31838858 width=239) + predicate:((cs_net_profit > 1) and (cs_net_paid > 0) and (cs_quantity > 0) and cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_43_date_dim_d_date_sk_min) AND DynamicValue(RS_43_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_43_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_36] (rows=287989836 width=239) default@catalog_sales,cs,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_net_paid","cs_net_profit"] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_317] - Group By Operator [GBY_316] (rows=1 width=12) + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_320] + Group By Operator [GBY_319] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_283] - Group By Operator [GBY_280] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_289] + Group By Operator [GBY_286] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_276] (rows=50 width=4) + Select Operator [SEL_282] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_272] - <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_304] + Please refer to the previous Select Operator [SEL_278] + <-Reducer 5 [CONTAINS] vectorized + Reduce Output Operator [RS_307] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_303] (rows=2276 width=215) + Group By Operator [GBY_306] (rows=2276 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Select Operator [SEL_302] (rows=1142 width=211) + Select Operator [SEL_305] (rows=1142 width=211) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_301] (rows=1142 width=248) + Filter Operator [FIL_304] (rows=1142 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_300] (rows=1714 width=248) + PTF Operator [PTF_303] (rows=1714 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_299] (rows=1714 width=248) + Select Operator [SEL_302] (rows=1714 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_301] PartitionCols:0 - Select Operator [SEL_297] (rows=1714 width=244) + Select Operator [SEL_300] (rows=1714 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_296] (rows=1714 width=244) + PTF Operator [PTF_299] (rows=1714 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_295] (rows=1714 width=244) + Select Operator [SEL_298] (rows=1714 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_294] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_297] PartitionCols:0 - Group By Operator [GBY_293] (rows=1714 width=244) + Group By Operator [GBY_296] (rows=1714 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_19] PartitionCols:_col0 - Group By Operator [GBY_17] (rows=1714 width=244) + Group By Operator [GBY_18] (rows=1714 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0 - Select Operator [SEL_15] (rows=438010 width=177) + Select Operator [SEL_16] (rows=438010 width=177) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_238] (rows=438010 width=177) - Conds:RS_12._col1, _col2=RS_292._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_292] + Merge Join Operator [MERGEJOIN_241] (rows=438010 width=177) + Conds:RS_276._col0, _col1=RS_14._col1, _col2(Inner),Output:["_col2","_col3","_col5","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_276] PartitionCols:_col0, _col1 - Select Operator [SEL_291] (rows=4799489 width=118) + Select Operator [SEL_275] (rows=4799489 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_290] (rows=4799489 width=118) + Filter Operator [FIL_274] (rows=4799489 width=118) predicate:(wr_return_amt > 10000) - TableScan [TS_6] (rows=14398467 width=118) + TableScan [TS_0] (rows=14398467 width=118) default@web_returns,wr,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_quantity","wr_return_amt"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_237] (rows=438010 width=122) - Conds:RS_289._col0=RS_273._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_273] + Merge Join Operator [MERGEJOIN_240] (rows=438010 width=122) + Conds:RS_295._col0=RS_279._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_279] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_272] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] + Please refer to the previous Select Operator [SEL_278] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] PartitionCols:_col0 - Select Operator [SEL_288] (rows=15996318 width=239) + Select Operator [SEL_294] (rows=15996318 width=239) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_287] (rows=15996318 width=239) + Filter Operator [FIL_293] (rows=15996318 width=239) predicate:((ws_net_profit > 1) and (ws_net_paid > 0) and (ws_quantity > 0) and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=239) + TableScan [TS_3] (rows=144002668 width=239) default@web_sales,ws,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_net_paid","ws_net_profit"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_286] - Group By Operator [GBY_285] (rows=1 width=12) + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_292] + Group By Operator [GBY_291] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_282] - Group By Operator [GBY_279] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_288] + Group By Operator [GBY_285] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_274] (rows=50 width=4) + Select Operator [SEL_280] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_272] + Please refer to the previous Select Operator [SEL_278] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query5.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query5.q.out index a5870f7727..0aaf6c30d1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query5.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query5.q.out @@ -278,13 +278,13 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Reducer 11 (BROADCAST_EDGE), Union 2 (CONTAINS) -Map 21 <- Reducer 15 (BROADCAST_EDGE), Union 22 (CONTAINS) -Map 23 <- Union 22 (CONTAINS) +Map 22 <- Reducer 15 (BROADCAST_EDGE), Union 23 (CONTAINS) +Map 24 <- Union 23 (CONTAINS) Map 25 <- Reducer 19 (BROADCAST_EDGE), Union 26 (CONTAINS) Map 9 <- Union 2 (CONTAINS) Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 10 (SIMPLE_EDGE), Union 22 (SIMPLE_EDGE) -Reducer 13 <- Map 24 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 12 <- Map 10 (SIMPLE_EDGE), Union 23 (SIMPLE_EDGE) +Reducer 13 <- Map 21 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Union 6 (CONTAINS) Reducer 15 <- Map 10 (CUSTOM_SIMPLE_EDGE) Reducer 16 <- Map 10 (SIMPLE_EDGE), Union 26 (SIMPLE_EDGE) @@ -303,229 +303,229 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_305] - Limit [LIM_304] (rows=100 width=619) + File Output Operator [FS_306] + Limit [LIM_305] (rows=100 width=619) Number of rows:100 - Select Operator [SEL_303] (rows=59581 width=619) + Select Operator [SEL_304] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_302] - Select Operator [SEL_301] (rows=59581 width=619) + SHUFFLE [RS_303] + Select Operator [SEL_302] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_300] (rows=59581 width=627) + Group By Operator [GBY_301] (rows=59581 width=627) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 6 [SIMPLE_EDGE] <-Reducer 14 [CONTAINS] vectorized - Reduce Output Operator [RS_315] + Reduce Output Operator [RS_316] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_314] (rows=59581 width=627) + Group By Operator [GBY_315] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_313] (rows=39721 width=618) + Top N Key Operator [TNK_314] (rows=39721 width=618) keys:_col0, _col1,top n:100 - Select Operator [SEL_312] (rows=38846 width=619) + Select Operator [SEL_313] (rows=38846 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_311] (rows=38846 width=548) + Group By Operator [GBY_312] (rows=38846 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_45] + SHUFFLE [RS_46] PartitionCols:_col0 - Group By Operator [GBY_44] (rows=2835758 width=548) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 - Merge Join Operator [MERGEJOIN_224] (rows=34813117 width=535) - Conds:RS_40._col0=RS_310._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_310] + Group By Operator [GBY_45] (rows=2835758 width=548) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col4)","sum(_col6)","sum(_col5)","sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_225] (rows=34813117 width=535) + Conds:RS_311._col0=RS_42._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_311] PartitionCols:_col0 - Select Operator [SEL_309] (rows=46000 width=104) + Select Operator [SEL_310] (rows=46000 width=104) Output:["_col0","_col1"] - TableScan [TS_35] (rows=46000 width=104) + TableScan [TS_24] (rows=46000 width=104) default@catalog_page,catalog_page,Tbl:COMPLETE,Col:COMPLETE,Output:["cp_catalog_page_sk","cp_catalog_page_id"] <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_40] + SHUFFLE [RS_42] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_223] (rows=34813117 width=438) - Conds:Union 22._col1=RS_278._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_224] (rows=34813117 width=438) + Conds:Union 23._col1=RS_279._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_278] + SHUFFLE [RS_279] PartitionCols:_col0 - Select Operator [SEL_275] (rows=8116 width=4) + Select Operator [SEL_276] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_274] (rows=8116 width=98) + Filter Operator [FIL_275] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-08-18 00:00:00' TableScan [TS_8] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Union 22 [SIMPLE_EDGE] - <-Map 21 [CONTAINS] vectorized - Reduce Output Operator [RS_327] + <-Union 23 [SIMPLE_EDGE] + <-Map 22 [CONTAINS] vectorized + Reduce Output Operator [RS_328] PartitionCols:_col1 - Select Operator [SEL_326] (rows=285117694 width=455) + Select Operator [SEL_327] (rows=285117694 width=455) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_325] (rows=285117694 width=231) + Filter Operator [FIL_326] (rows=285117694 width=231) predicate:(cs_sold_date_sk is not null and cs_catalog_page_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_38_date_dim_d_date_sk_min) AND DynamicValue(RS_38_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_38_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_255] (rows=287989836 width=231) + TableScan [TS_256] (rows=287989836 width=231) Output:["cs_sold_date_sk","cs_catalog_page_sk","cs_ext_sales_price","cs_net_profit"] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_324] - Group By Operator [GBY_323] (rows=1 width=12) + BROADCAST [RS_325] + Group By Operator [GBY_324] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_286] - Group By Operator [GBY_283] (rows=1 width=12) + SHUFFLE [RS_287] + Group By Operator [GBY_284] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_279] (rows=8116 width=4) + Select Operator [SEL_280] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_275] - <-Map 23 [CONTAINS] vectorized - Reduce Output Operator [RS_330] + Please refer to the previous Select Operator [SEL_276] + <-Map 24 [CONTAINS] vectorized + Reduce Output Operator [RS_331] PartitionCols:_col1 - Select Operator [SEL_329] (rows=28221805 width=451) + Select Operator [SEL_330] (rows=28221805 width=451) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_328] (rows=28221805 width=227) + Filter Operator [FIL_329] (rows=28221805 width=227) predicate:(cr_catalog_page_sk is not null and cr_returned_date_sk is not null) - TableScan [TS_260] (rows=28798881 width=227) + TableScan [TS_261] (rows=28798881 width=227) Output:["cr_returned_date_sk","cr_catalog_page_sk","cr_return_amount","cr_net_loss"] <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_322] + Reduce Output Operator [RS_323] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_321] (rows=59581 width=627) + Group By Operator [GBY_322] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_320] (rows=39721 width=618) + Top N Key Operator [TNK_321] (rows=39721 width=618) keys:_col0, _col1,top n:100 - Select Operator [SEL_319] (rows=53 width=615) + Select Operator [SEL_320] (rows=53 width=615) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_318] (rows=53 width=548) + Group By Operator [GBY_319] (rows=53 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_77] + SHUFFLE [RS_78] PartitionCols:_col0 - Group By Operator [GBY_76] (rows=3498 width=548) + Group By Operator [GBY_77] (rows=3498 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 - Merge Join Operator [MERGEJOIN_226] (rows=30966059 width=543) - Conds:RS_72._col0=RS_317._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] + Merge Join Operator [MERGEJOIN_227] (rows=30966059 width=543) + Conds:RS_73._col0=RS_318._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_317] + SHUFFLE [RS_318] PartitionCols:_col0 - Select Operator [SEL_316] (rows=84 width=104) + Select Operator [SEL_317] (rows=84 width=104) Output:["_col0","_col1"] - TableScan [TS_67] (rows=84 width=104) + TableScan [TS_68] (rows=84 width=104) default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_site_id"] <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_72] + SHUFFLE [RS_73] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_225] (rows=30966059 width=447) - Conds:Union 26._col1=RS_280._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_226] (rows=30966059 width=447) + Conds:Union 26._col1=RS_281._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_280] + SHUFFLE [RS_281] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_275] + Please refer to the previous Select Operator [SEL_276] <-Union 26 [SIMPLE_EDGE] <-Map 25 [CONTAINS] vectorized - Reduce Output Operator [RS_335] + Reduce Output Operator [RS_336] PartitionCols:_col1 - Select Operator [SEL_334] (rows=143930874 width=455) + Select Operator [SEL_335] (rows=143930874 width=455) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_333] (rows=143930874 width=231) - predicate:(ws_web_site_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_70_date_dim_d_date_sk_min) AND DynamicValue(RS_70_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_70_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_265] (rows=144002668 width=231) + Filter Operator [FIL_334] (rows=143930874 width=231) + predicate:(ws_web_site_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_71_date_dim_d_date_sk_min) AND DynamicValue(RS_71_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_71_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_266] (rows=144002668 width=231) Output:["ws_sold_date_sk","ws_web_site_sk","ws_ext_sales_price","ws_net_profit"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_332] - Group By Operator [GBY_331] (rows=1 width=12) + BROADCAST [RS_333] + Group By Operator [GBY_332] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_287] - Group By Operator [GBY_284] (rows=1 width=12) + SHUFFLE [RS_288] + Group By Operator [GBY_285] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_281] (rows=8116 width=4) + Select Operator [SEL_282] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_275] + Please refer to the previous Select Operator [SEL_276] <-Reducer 28 [CONTAINS] - Reduce Output Operator [RS_273] + Reduce Output Operator [RS_274] PartitionCols:_col1 - Select Operator [SEL_271] (rows=134782734 width=454) + Select Operator [SEL_272] (rows=134782734 width=454) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_270] (rows=134782734 width=230) - Conds:RS_338._col0, _col2=RS_341._col1, _col2(Inner),Output:["_col1","_col3","_col6","_col7"] + Merge Join Operator [MERGEJOIN_271] (rows=134782734 width=230) + Conds:RS_339._col0, _col2=RS_342._col1, _col2(Inner),Output:["_col1","_col3","_col6","_col7"] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_338] + SHUFFLE [RS_339] PartitionCols:_col0, _col2 - Select Operator [SEL_337] (rows=143966669 width=11) + Select Operator [SEL_338] (rows=143966669 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_336] (rows=143966669 width=11) + Filter Operator [FIL_337] (rows=143966669 width=11) predicate:ws_web_site_sk is not null - TableScan [TS_52] (rows=144002668 width=11) + TableScan [TS_53] (rows=144002668 width=11) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_item_sk","ws_web_site_sk","ws_order_number"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_341] + SHUFFLE [RS_342] PartitionCols:_col1, _col2 - Select Operator [SEL_340] (rows=13749816 width=225) + Select Operator [SEL_341] (rows=13749816 width=225) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_339] (rows=13749816 width=225) + Filter Operator [FIL_340] (rows=13749816 width=225) predicate:wr_returned_date_sk is not null - TableScan [TS_55] (rows=14398467 width=225) + TableScan [TS_56] (rows=14398467 width=225) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_item_sk","wr_order_number","wr_return_amt","wr_net_loss"] <-Reducer 5 [CONTAINS] vectorized - Reduce Output Operator [RS_299] + Reduce Output Operator [RS_300] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_298] (rows=59581 width=627) + Group By Operator [GBY_299] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_297] (rows=39721 width=618) + Top N Key Operator [TNK_298] (rows=39721 width=618) keys:_col0, _col1,top n:100 - Select Operator [SEL_296] (rows=822 width=617) + Select Operator [SEL_297] (rows=822 width=617) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_295] (rows=822 width=548) + Group By Operator [GBY_296] (rows=822 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col0 Group By Operator [GBY_20] (rows=78090 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 - Merge Join Operator [MERGEJOIN_222] (rows=64325014 width=376) - Conds:RS_16._col0=RS_294._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] + Merge Join Operator [MERGEJOIN_223] (rows=64325014 width=376) + Conds:RS_16._col0=RS_295._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_294] + SHUFFLE [RS_295] PartitionCols:_col0 - Select Operator [SEL_293] (rows=1704 width=104) + Select Operator [SEL_294] (rows=1704 width=104) Output:["_col0","_col1"] TableScan [TS_11] (rows=1704 width=104) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_16] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_221] (rows=64325014 width=277) - Conds:Union 2._col1=RS_276._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_222] (rows=64325014 width=277) + Conds:Union 2._col1=RS_277._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_276] + SHUFFLE [RS_277] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_275] + Please refer to the previous Select Operator [SEL_276] <-Union 2 [SIMPLE_EDGE] <-Map 1 [CONTAINS] vectorized - Reduce Output Operator [RS_292] + Reduce Output Operator [RS_293] PartitionCols:_col1 - Select Operator [SEL_291] (rows=525329897 width=445) + Select Operator [SEL_292] (rows=525329897 width=445) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_290] (rows=525329897 width=221) + Filter Operator [FIL_291] (rows=525329897 width=221) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_14_date_dim_d_date_sk_min) AND DynamicValue(RS_14_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_14_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_227] (rows=575995635 width=221) + TableScan [TS_228] (rows=575995635 width=221) Output:["ss_sold_date_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_289] - Group By Operator [GBY_288] (rows=1 width=12) + BROADCAST [RS_290] + Group By Operator [GBY_289] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_285] - Group By Operator [GBY_282] (rows=1 width=12) + SHUFFLE [RS_286] + Group By Operator [GBY_283] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_277] (rows=8116 width=4) + Select Operator [SEL_278] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_275] + Please refer to the previous Select Operator [SEL_276] <-Map 9 [CONTAINS] vectorized - Reduce Output Operator [RS_308] + Reduce Output Operator [RS_309] PartitionCols:_col1 - Select Operator [SEL_307] (rows=53634860 width=447) + Select Operator [SEL_308] (rows=53634860 width=447) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_306] (rows=53634860 width=223) + Filter Operator [FIL_307] (rows=53634860 width=223) predicate:(sr_store_sk is not null and sr_returned_date_sk is not null) - TableScan [TS_238] (rows=57591150 width=223) + TableScan [TS_239] (rows=57591150 width=223) Output:["sr_returned_date_sk","sr_store_sk","sr_return_amt","sr_net_loss"] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query50.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query50.q.out index 7d0ffc7bd2..8f06489fe3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query50.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query50.q.out @@ -127,92 +127,92 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 9 <- Reducer 7 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Map 1 <- Reducer 8 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_119] - Limit [LIM_118] (rows=100 width=858) + Reducer 5 vectorized + File Output Operator [FS_120] + Limit [LIM_119] (rows=100 width=858) Number of rows:100 - Select Operator [SEL_117] (rows=11945216 width=857) + Select Operator [SEL_118] (rows=11945216 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_116] - Group By Operator [GBY_115] (rows=11945216 width=857) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_117] + Group By Operator [GBY_116] (rows=11945216 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_23] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Group By Operator [GBY_22] (rows=11945216 width=857) + Group By Operator [GBY_23] (rows=11945216 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Select Operator [SEL_20] (rows=11945216 width=821) + Select Operator [SEL_21] (rows=11945216 width=821) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - Top N Key Operator [TNK_46] (rows=11945216 width=821) + Top N Key Operator [TNK_47] (rows=11945216 width=821) keys:_col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20,top n:100 - Merge Join Operator [MERGEJOIN_101] (rows=11945216 width=821) - Conds:RS_17._col8=RS_114._col0(Inner),Output:["_col0","_col5","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] + Merge Join Operator [MERGEJOIN_102] (rows=11945216 width=821) + Conds:RS_18._col3=RS_115._col0(Inner),Output:["_col0","_col5","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_114] + SHUFFLE [RS_115] PartitionCols:_col0 - Select Operator [SEL_113] (rows=1704 width=821) + Select Operator [SEL_114] (rows=1704 width=821) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - TableScan [TS_9] (rows=1704 width=821) + TableScan [TS_13] (rows=1704 width=821) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_company_id","s_street_number","s_street_name","s_street_type","s_suite_number","s_city","s_county","s_state","s_zip"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] - PartitionCols:_col8 - Merge Join Operator [MERGEJOIN_100] (rows=11945216 width=3) - Conds:RS_14._col1, _col2, _col3=RS_112._col1, _col2, _col4(Inner),Output:["_col0","_col5","_col8"] - <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_14] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_101] (rows=11945216 width=3) + Conds:RS_113._col1, _col2, _col4=RS_16._col1, _col2, _col3(Inner),Output:["_col0","_col3","_col5"] + <-Reducer 7 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_16] PartitionCols:_col1, _col2, _col3 - Merge Join Operator [MERGEJOIN_99] (rows=1339446 width=8) - Conds:RS_104._col0=RS_107._col0(Inner),Output:["_col0","_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_104] + Merge Join Operator [MERGEJOIN_100] (rows=1339446 width=8) + Conds:RS_105._col0=RS_108._col0(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_105] PartitionCols:_col0 - Select Operator [SEL_103] (rows=53632139 width=15) + Select Operator [SEL_104] (rows=53632139 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_102] (rows=53632139 width=15) + Filter Operator [FIL_103] (rows=53632139 width=15) predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) - TableScan [TS_0] (rows=57591150 width=15) + TableScan [TS_3] (rows=57591150 width=15) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_107] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_108] PartitionCols:_col0 - Select Operator [SEL_106] (rows=50 width=4) + Select Operator [SEL_107] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_105] (rows=50 width=12) + Filter Operator [FIL_106] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 9)) - TableScan [TS_3] (rows=73049 width=12) + TableScan [TS_6] (rows=73049 width=12) default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_112] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_113] PartitionCols:_col1, _col2, _col4 - Select Operator [SEL_111] (rows=501694138 width=19) + Select Operator [SEL_112] (rows=501694138 width=19) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_110] (rows=501694138 width=19) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_ticket_number BETWEEN DynamicValue(RS_14_store_returns_sr_ticket_number_min) AND DynamicValue(RS_14_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_14_store_returns_sr_ticket_number_bloom_filter))) - TableScan [TS_6] (rows=575995635 width=19) + Filter Operator [FIL_111] (rows=501694138 width=19) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_ticket_number BETWEEN DynamicValue(RS_16_store_returns_sr_ticket_number_min) AND DynamicValue(RS_16_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_16_store_returns_sr_ticket_number_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=19) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number"] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_109] - Group By Operator [GBY_108] (rows=1 width=12) + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_110] + Group By Operator [GBY_109] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_86] - Group By Operator [GBY_85] (rows=1 width=12) + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_62] + Group By Operator [GBY_61] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_84] (rows=1339446 width=8) + Select Operator [SEL_60] (rows=1339446 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_99] + Please refer to the previous Merge Join Operator [MERGEJOIN_100] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out index 5a2d93a5b0..8929007f81 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[279][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[285][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[284][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 15' is a cross product -Warning: Shuffle Join MERGEJOIN[287][tables = [$hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[281][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[287][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[286][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 15' is a cross product +Warning: Shuffle Join MERGEJOIN[289][tables = [$hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 16' is a cross product PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk @@ -133,8 +133,8 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 22 <- Reducer 30 (BROADCAST_EDGE), Union 23 (CONTAINS) -Map 28 <- Reducer 30 (BROADCAST_EDGE), Union 23 (CONTAINS) +Map 22 <- Reducer 28 (BROADCAST_EDGE), Union 23 (CONTAINS) +Map 26 <- Reducer 28 (BROADCAST_EDGE), Union 23 (CONTAINS) Reducer 10 <- Map 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) Reducer 12 <- Map 9 (SIMPLE_EDGE) @@ -143,16 +143,16 @@ Reducer 14 <- Map 9 (SIMPLE_EDGE) Reducer 15 <- Map 33 (CUSTOM_SIMPLE_EDGE), Reducer 14 (CUSTOM_SIMPLE_EDGE) Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 17 (CUSTOM_SIMPLE_EDGE) Reducer 17 <- Map 9 (SIMPLE_EDGE) -Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 11 (CUSTOM_SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) -Reducer 24 <- Map 29 (SIMPLE_EDGE), Union 23 (SIMPLE_EDGE) -Reducer 25 <- Map 31 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Map 32 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 24 <- Map 27 (SIMPLE_EDGE), Union 23 (SIMPLE_EDGE) +Reducer 25 <- Map 29 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 28 <- Map 27 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 30 <- Map 29 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 20 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 31 <- Map 30 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) +Reducer 4 <- Reducer 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) @@ -163,269 +163,267 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_373] - Limit [LIM_372] (rows=1 width=16) + File Output Operator [FS_375] + Limit [LIM_374] (rows=1 width=16) Number of rows:100 - Select Operator [SEL_371] (rows=1 width=16) + Select Operator [SEL_373] (rows=1 width=16) Output:["_col0","_col1","_col2"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_370] - Select Operator [SEL_369] (rows=1 width=16) + SHUFFLE [RS_372] + Select Operator [SEL_371] (rows=1 width=16) Output:["_col0","_col1","_col2"] - Top N Key Operator [TNK_368] (rows=1 width=12) + Top N Key Operator [TNK_370] (rows=1 width=12) keys:_col0, _col1,top n:100 - Group By Operator [GBY_367] (rows=1 width=12) + Group By Operator [GBY_369] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_366] + SHUFFLE [RS_368] PartitionCols:_col0 - Group By Operator [GBY_365] (rows=1 width=12) + Group By Operator [GBY_367] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_364] (rows=1 width=116) + Select Operator [SEL_366] (rows=1 width=116) Output:["_col0"] - Top N Key Operator [TNK_363] (rows=1 width=116) + Top N Key Operator [TNK_365] (rows=1 width=116) keys:UDFToInteger((_col1 / 50)),top n:100 - Group By Operator [GBY_362] (rows=1 width=116) + Group By Operator [GBY_364] (rows=1 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_119] + SHUFFLE [RS_121] PartitionCols:_col0 - Group By Operator [GBY_118] (rows=228 width=116) + Group By Operator [GBY_120] (rows=228 width=116) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_289] (rows=525327388 width=110) - Conds:RS_114._col0=RS_115._col0(Inner),Output:["_col2","_col5"] + Merge Join Operator [MERGEJOIN_291] (rows=525327388 width=110) + Conds:RS_116._col0=RS_117._col0(Inner),Output:["_col2","_col5"] <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_115] + SHUFFLE [RS_117] PartitionCols:_col0 - Select Operator [SEL_104] (rows=5072841 width=12) + Select Operator [SEL_106] (rows=5072841 width=12) Output:["_col0"] - Filter Operator [FIL_103] (rows=5072841 width=12) + Filter Operator [FIL_105] (rows=5072841 width=12) predicate:(_col1 <= _col3) - Merge Join Operator [MERGEJOIN_287] (rows=15218525 width=12) + Merge Join Operator [MERGEJOIN_289] (rows=15218525 width=12) Conds:(Inner),Output:["_col0","_col1","_col3"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_100] - Filter Operator [FIL_99] (rows=608741 width=12) + PARTITION_ONLY_SHUFFLE [RS_102] + Filter Operator [FIL_101] (rows=608741 width=12) predicate:(_col2 <= _col1) - Merge Join Operator [MERGEJOIN_284] (rows=1826225 width=12) + Merge Join Operator [MERGEJOIN_286] (rows=1826225 width=12) Conds:(Inner),Output:["_col0","_col1","_col2"] <-Map 33 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_359] - Select Operator [SEL_358] (rows=73049 width=8) + PARTITION_ONLY_SHUFFLE [RS_361] + Select Operator [SEL_360] (rows=73049 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_357] (rows=73049 width=8) + Filter Operator [FIL_359] (rows=73049 width=8) predicate:d_month_seq is not null - TableScan [TS_77] (rows=73049 width=8) + TableScan [TS_79] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_356] - Group By Operator [GBY_355] (rows=25 width=4) + PARTITION_ONLY_SHUFFLE [RS_358] + Group By Operator [GBY_357] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_316] + SHUFFLE [RS_318] PartitionCols:_col0 - Group By Operator [GBY_312] (rows=25 width=4) + Group By Operator [GBY_314] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_308] (rows=50 width=12) + Select Operator [SEL_310] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_304] (rows=50 width=12) + Filter Operator [FIL_306] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 3) and d_month_seq is not null) TableScan [TS_3] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_361] - Group By Operator [GBY_360] (rows=25 width=4) + PARTITION_ONLY_SHUFFLE [RS_363] + Group By Operator [GBY_362] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_317] + SHUFFLE [RS_319] PartitionCols:_col0 - Group By Operator [GBY_313] (rows=25 width=4) + Group By Operator [GBY_315] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_309] (rows=50 width=12) + Select Operator [SEL_311] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_305] (rows=50 width=12) + Filter Operator [FIL_307] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 3) and d_month_seq is not null) Please refer to the previous TableScan [TS_3] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_114] + SHUFFLE [RS_116] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_288] (rows=525327388 width=114) - Conds:RS_111._col1=RS_112._col0(Inner),Output:["_col0","_col2","_col5"] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_112] + Merge Join Operator [MERGEJOIN_290] (rows=525327388 width=114) + Conds:RS_113._col1=RS_114._col0(Inner),Output:["_col0","_col2","_col5"] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_114] PartitionCols:_col0 - Select Operator [SEL_76] (rows=55046 width=4) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_286] (rows=55046 width=4) - Conds:RS_73._col0=RS_354._col1(Inner),Output:["_col5"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_73] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_280] (rows=39720279 width=4) - Conds:RS_336._col1, _col2=RS_339._col0, _col1(Inner),Output:["_col0"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_336] - PartitionCols:_col1, _col2 - Select Operator [SEL_335] (rows=40000000 width=188) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_334] (rows=40000000 width=188) - predicate:(ca_county is not null and ca_state is not null) - TableScan [TS_33] (rows=40000000 width=188) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_339] + Merge Join Operator [MERGEJOIN_288] (rows=55046 width=4) + Conds:RS_350._col1=RS_76._col0(Inner),Output:["_col0"] + <-Reducer 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_350] + PartitionCols:_col1 + Select Operator [SEL_349] (rows=55046 width=8) + Output:["_col0","_col1"] + Group By Operator [GBY_348] (rows=55046 width=8) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_62] PartitionCols:_col0, _col1 - Select Operator [SEL_338] (rows=1704 width=184) - Output:["_col0","_col1"] - Filter Operator [FIL_337] (rows=1704 width=184) - predicate:(s_county is not null and s_state is not null) - TableScan [TS_36] (rows=1704 width=184) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] - <-Reducer 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_354] - PartitionCols:_col1 - Select Operator [SEL_353] (rows=55046 width=8) - Output:["_col0","_col1"] - Group By Operator [GBY_352] (rows=55046 width=8) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_67] - PartitionCols:_col0, _col1 - Group By Operator [GBY_66] (rows=55046 width=8) - Output:["_col0","_col1"],keys:_col6, _col5 - Merge Join Operator [MERGEJOIN_283] (rows=110092 width=8) - Conds:RS_62._col1=RS_351._col0(Inner),Output:["_col5","_col6"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_351] - PartitionCols:_col0 - Select Operator [SEL_350] (rows=80000000 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_349] (rows=80000000 width=8) - predicate:c_current_addr_sk is not null - TableScan [TS_53] (rows=80000000 width=8) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_62] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_282] (rows=110092 width=0) - Conds:RS_59._col2=RS_348._col0(Inner),Output:["_col1"] - <-Map 31 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_348] - PartitionCols:_col0 - Select Operator [SEL_347] (rows=453 width=4) - Output:["_col0"] - Filter Operator [FIL_346] (rows=453 width=186) - predicate:((i_class = 'consignment') and (i_category = 'Jewelry')) - TableScan [TS_50] (rows=462000 width=186) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_59] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_281] (rows=11665117 width=7) - Conds:Union 23._col0=RS_342._col0(Inner),Output:["_col1","_col2"] - <-Map 29 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_342] + Group By Operator [GBY_61] (rows=55046 width=8) + Output:["_col0","_col1"],keys:_col1, _col0 + Merge Join Operator [MERGEJOIN_284] (rows=110092 width=8) + Conds:RS_338._col0=RS_58._col1(Inner),Output:["_col0","_col1"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_338] + PartitionCols:_col0 + Select Operator [SEL_337] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_336] (rows=80000000 width=8) + predicate:c_current_addr_sk is not null + TableScan [TS_33] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_58] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_283] (rows=110092 width=0) + Conds:RS_53._col2=RS_347._col0(Inner),Output:["_col1"] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_347] + PartitionCols:_col0 + Select Operator [SEL_346] (rows=453 width=4) + Output:["_col0"] + Filter Operator [FIL_345] (rows=453 width=186) + predicate:((i_class = 'consignment') and (i_category = 'Jewelry')) + TableScan [TS_47] (rows=462000 width=186) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_282] (rows=11665117 width=7) + Conds:Union 23._col0=RS_341._col0(Inner),Output:["_col1","_col2"] + <-Map 27 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_341] + PartitionCols:_col0 + Select Operator [SEL_340] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_339] (rows=50 width=12) + predicate:((d_year = 1999) and (d_moy = 3)) + TableScan [TS_44] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Union 23 [SIMPLE_EDGE] + <-Map 22 [CONTAINS] vectorized + Reduce Output Operator [RS_381] PartitionCols:_col0 - Select Operator [SEL_341] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_340] (rows=50 width=12) - predicate:((d_year = 1999) and (d_moy = 3)) - TableScan [TS_47] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Union 23 [SIMPLE_EDGE] - <-Map 22 [CONTAINS] vectorized - Reduce Output Operator [RS_379] - PartitionCols:_col0 - Select Operator [SEL_378] (rows=285117831 width=11) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_377] (rows=285117831 width=11) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_290] (rows=287989836 width=11) - Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] - <-Reducer 30 [BROADCAST_EDGE] vectorized - BROADCAST [RS_375] - Group By Operator [GBY_374] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_345] - Group By Operator [GBY_344] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_343] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_341] - <-Map 28 [CONTAINS] vectorized - Reduce Output Operator [RS_382] - PartitionCols:_col0 - Select Operator [SEL_381] (rows=143930993 width=11) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_380] (rows=143930993 width=11) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_295] (rows=144002668 width=11) - Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] - <-Reducer 30 [BROADCAST_EDGE] vectorized - BROADCAST [RS_376] - Please refer to the previous Group By Operator [GBY_374] + Select Operator [SEL_380] (rows=285117831 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_379] (rows=285117831 width=11) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_51_date_dim_d_date_sk_min) AND DynamicValue(RS_51_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_51_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_292] (rows=287989836 width=11) + Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] + <-Reducer 28 [BROADCAST_EDGE] vectorized + BROADCAST [RS_377] + Group By Operator [GBY_376] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_344] + Group By Operator [GBY_343] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_342] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_340] + <-Map 26 [CONTAINS] vectorized + Reduce Output Operator [RS_384] + PartitionCols:_col0 + Select Operator [SEL_383] (rows=143930993 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_382] (rows=143930993 width=11) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_51_date_dim_d_date_sk_min) AND DynamicValue(RS_51_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_51_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_297] (rows=144002668 width=11) + Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] + <-Reducer 28 [BROADCAST_EDGE] vectorized + BROADCAST [RS_378] + Please refer to the previous Group By Operator [GBY_376] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_76] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_285] (rows=39720279 width=4) + Conds:RS_353._col1, _col2=RS_356._col0, _col1(Inner),Output:["_col0"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_353] + PartitionCols:_col1, _col2 + Select Operator [SEL_352] (rows=40000000 width=188) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_351] (rows=40000000 width=188) + predicate:(ca_county is not null and ca_state is not null) + TableScan [TS_65] (rows=40000000 width=188) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_356] + PartitionCols:_col0, _col1 + Select Operator [SEL_355] (rows=1704 width=184) + Output:["_col0","_col1"] + Filter Operator [FIL_354] (rows=1704 width=184) + predicate:(s_county is not null and s_state is not null) + TableScan [TS_68] (rows=1704 width=184) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_111] + SHUFFLE [RS_113] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_285] (rows=525327388 width=114) + Merge Join Operator [MERGEJOIN_287] (rows=525327388 width=114) Conds:(Inner),Output:["_col0","_col1","_col2"] <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_333] - Select Operator [SEL_332] (rows=1 width=8) - Filter Operator [FIL_331] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_335] + Select Operator [SEL_334] (rows=1 width=8) + Filter Operator [FIL_333] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_330] (rows=1 width=8) + Group By Operator [GBY_332] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_329] - Group By Operator [GBY_328] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_331] + Group By Operator [GBY_330] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_327] (rows=25 width=4) - Group By Operator [GBY_326] (rows=25 width=4) + Select Operator [SEL_329] (rows=25 width=4) + Group By Operator [GBY_328] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_315] + SHUFFLE [RS_317] PartitionCols:_col0 - Group By Operator [GBY_311] (rows=25 width=4) + Group By Operator [GBY_313] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_307] (rows=50 width=12) + Select Operator [SEL_309] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_303] (rows=50 width=12) + Filter Operator [FIL_305] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 3)) Please refer to the previous TableScan [TS_3] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_108] - Merge Join Operator [MERGEJOIN_279] (rows=525327388 width=114) + PARTITION_ONLY_SHUFFLE [RS_110] + Merge Join Operator [MERGEJOIN_281] (rows=525327388 width=114) Conds:(Inner),Output:["_col0","_col1","_col2"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_302] - Select Operator [SEL_301] (rows=525327388 width=114) + PARTITION_ONLY_SHUFFLE [RS_304] + Select Operator [SEL_303] (rows=525327388 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_300] (rows=525327388 width=114) + Filter Operator [FIL_302] (rows=525327388 width=114) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_325] - Select Operator [SEL_324] (rows=1 width=8) - Filter Operator [FIL_323] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_327] + Select Operator [SEL_326] (rows=1 width=8) + Filter Operator [FIL_325] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_322] (rows=1 width=8) + Group By Operator [GBY_324] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_321] - Group By Operator [GBY_320] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_323] + Group By Operator [GBY_322] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_319] (rows=25 width=4) - Group By Operator [GBY_318] (rows=25 width=4) + Select Operator [SEL_321] (rows=25 width=4) + Group By Operator [GBY_320] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_314] + SHUFFLE [RS_316] PartitionCols:_col0 - Group By Operator [GBY_310] (rows=25 width=4) + Group By Operator [GBY_312] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_306] (rows=50 width=12) + Select Operator [SEL_308] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_303] + Please refer to the previous Filter Operator [FIL_305] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query56.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query56.q.out index aac99e9303..26f0795d3f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query56.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query56.q.out @@ -149,27 +149,27 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 14 <- Reducer 18 (BROADCAST_EDGE) -Map 26 <- Reducer 21 (BROADCAST_EDGE) -Map 27 <- Reducer 24 (BROADCAST_EDGE) -Reducer 10 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Map 18 <- Reducer 21 (BROADCAST_EDGE) +Map 26 <- Reducer 23 (BROADCAST_EDGE) +Map 27 <- Reducer 25 (BROADCAST_EDGE) +Reducer 10 <- Reducer 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 13 <- Map 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 16 <- Map 25 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 16 <- Map 14 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 17 <- Map 14 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 20 <- Map 25 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 17 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 23 <- Map 25 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 20 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 23 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 20 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 25 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 8 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 5 (CONTAINS) Stage-0 @@ -177,224 +177,218 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_355] - Limit [LIM_354] (rows=100 width=212) + File Output Operator [FS_358] + Limit [LIM_357] (rows=100 width=212) Number of rows:100 - Select Operator [SEL_353] (rows=355 width=212) + Select Operator [SEL_356] (rows=355 width=212) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_352] - Top N Key Operator [TNK_351] (rows=355 width=212) + SHUFFLE [RS_355] + Top N Key Operator [TNK_354] (rows=355 width=212) keys:_col1,top n:100 - Group By Operator [GBY_350] (rows=355 width=212) + Group By Operator [GBY_353] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Union 5 [SIMPLE_EDGE] <-Reducer 11 [CONTAINS] vectorized - Reduce Output Operator [RS_371] + Reduce Output Operator [RS_374] PartitionCols:_col0 - Group By Operator [GBY_370] (rows=355 width=212) + Group By Operator [GBY_373] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_369] (rows=355 width=212) + Group By Operator [GBY_372] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_106] + SHUFFLE [RS_109] PartitionCols:_col0 - Group By Operator [GBY_105] (rows=355 width=212) + Group By Operator [GBY_108] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_302] (rows=172427 width=188) - Conds:RS_101._col0=RS_102._col2(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_305] (rows=172427 width=188) + Conds:RS_104._col0=RS_105._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_101] + SHUFFLE [RS_104] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_291] (rows=15609 width=104) - Conds:RS_316._col1=RS_322._col0(Inner),Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_294] (rows=15609 width=104) + Conds:RS_319._col1=RS_325._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_316] + SHUFFLE [RS_319] PartitionCols:_col1 - Select Operator [SEL_315] (rows=462000 width=104) + Select Operator [SEL_318] (rows=462000 width=104) Output:["_col0","_col1"] TableScan [TS_0] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_322] + SHUFFLE [RS_325] PartitionCols:_col0 - Group By Operator [GBY_321] (rows=10500 width=100) + Group By Operator [GBY_324] (rows=10500 width=100) Output:["_col0"],keys:KEY._col0 <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + SHUFFLE [RS_323] PartitionCols:_col0 - Group By Operator [GBY_319] (rows=10500 width=100) + Group By Operator [GBY_322] (rows=10500 width=100) Output:["_col0"],keys:i_item_id - Select Operator [SEL_318] (rows=21000 width=189) + Select Operator [SEL_321] (rows=21000 width=189) Output:["i_item_id"] - Filter Operator [FIL_317] (rows=21000 width=189) + Filter Operator [FIL_320] (rows=21000 width=189) predicate:(i_color) IN ('orchid', 'chiffon', 'lace') TableScan [TS_2] (rows=462000 width=189) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_color"] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_102] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_105] PartitionCols:_col2 - Select Operator [SEL_97] (rows=788222 width=110) - Output:["_col2","_col4"] - Merge Join Operator [MERGEJOIN_299] (rows=788222 width=110) - Conds:RS_94._col2=RS_346._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_346] - PartitionCols:_col0 - Select Operator [SEL_343] (rows=8000000 width=4) - Output:["_col0"] - Filter Operator [FIL_342] (rows=8000000 width=112) - predicate:(ca_gmt_offset = -8) - TableScan [TS_15] (rows=40000000 width=112) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_94] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_298] (rows=3941109 width=118) - Conds:RS_368._col0=RS_329._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_329] - PartitionCols:_col0 - Select Operator [SEL_324] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_323] (rows=50 width=12) - predicate:((d_year = 2000) and (d_moy = 1)) - TableScan [TS_12] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_368] - PartitionCols:_col0 - Select Operator [SEL_367] (rows=143931246 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_366] (rows=143931246 width=123) - predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_92_date_dim_d_date_sk_min) AND DynamicValue(RS_92_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_92_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_82] (rows=144002668 width=123) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_365] - Group By Operator [GBY_364] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_336] - Group By Operator [GBY_333] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_330] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_324] + Merge Join Operator [MERGEJOIN_302] (rows=788222 width=110) + Conds:RS_330._col0=RS_98._col2(Inner),Output:["_col2","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_330] + PartitionCols:_col0 + Select Operator [SEL_327] (rows=8000000 width=4) + Output:["_col0"] + Filter Operator [FIL_326] (rows=8000000 width=112) + predicate:(ca_gmt_offset = -8) + TableScan [TS_9] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_98] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_301] (rows=3941109 width=118) + Conds:RS_371._col0=RS_337._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_337] + PartitionCols:_col0 + Select Operator [SEL_332] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_331] (rows=50 width=12) + predicate:((d_year = 2000) and (d_moy = 1)) + TableScan [TS_15] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_371] + PartitionCols:_col0 + Select Operator [SEL_370] (rows=143931246 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_369] (rows=143931246 width=123) + predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_94_date_dim_d_date_sk_min) AND DynamicValue(RS_94_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_94_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_87] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_368] + Group By Operator [GBY_367] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_344] + Group By Operator [GBY_341] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_338] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_332] <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_349] + Reduce Output Operator [RS_352] PartitionCols:_col0 - Group By Operator [GBY_348] (rows=355 width=212) + Group By Operator [GBY_351] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_347] (rows=355 width=212) + Group By Operator [GBY_350] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_33] + SHUFFLE [RS_34] PartitionCols:_col0 - Group By Operator [GBY_32] (rows=355 width=212) + Group By Operator [GBY_33] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_300] (rows=629332 width=100) - Conds:RS_28._col0=RS_29._col2(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_303] (rows=629332 width=100) + Conds:RS_29._col0=RS_30._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_28] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_291] - <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_29] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_294] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col2 - Select Operator [SEL_24] (rows=2876890 width=4) - Output:["_col2","_col4"] - Merge Join Operator [MERGEJOIN_293] (rows=2876890 width=4) - Conds:RS_21._col2=RS_344._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_344] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_343] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_292] (rows=14384447 width=4) - Conds:RS_341._col0=RS_325._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_325] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_324] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_341] - PartitionCols:_col0 - Select Operator [SEL_340] (rows=525327191 width=118) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_339] (rows=525327191 width=118) - predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_9] (rows=575995635 width=118) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_338] - Group By Operator [GBY_337] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_334] - Group By Operator [GBY_331] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_326] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_324] + Merge Join Operator [MERGEJOIN_296] (rows=2876890 width=4) + Conds:RS_328._col0=RS_23._col2(Inner),Output:["_col2","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_328] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_327] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_295] (rows=14384447 width=4) + Conds:RS_349._col0=RS_333._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_333] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_332] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_349] + PartitionCols:_col0 + Select Operator [SEL_348] (rows=525327191 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_347] (rows=525327191 width=118) + predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_12] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_346] + Group By Operator [GBY_345] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_342] + Group By Operator [GBY_339] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_334] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_332] <-Reducer 9 [CONTAINS] vectorized - Reduce Output Operator [RS_363] + Reduce Output Operator [RS_366] PartitionCols:_col0 - Group By Operator [GBY_362] (rows=355 width=212) + Group By Operator [GBY_365] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_361] (rows=355 width=212) + Group By Operator [GBY_364] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_69] + SHUFFLE [RS_71] PartitionCols:_col0 - Group By Operator [GBY_68] (rows=355 width=212) + Group By Operator [GBY_70] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_301] (rows=339151 width=100) - Conds:RS_64._col0=RS_65._col3(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_304] (rows=339151 width=100) + Conds:RS_66._col0=RS_67._col3(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_64] + SHUFFLE [RS_66] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_291] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_65] + Please refer to the previous Merge Join Operator [MERGEJOIN_294] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_67] PartitionCols:_col3 - Select Operator [SEL_60] (rows=1550375 width=13) - Output:["_col3","_col4"] - Merge Join Operator [MERGEJOIN_296] (rows=1550375 width=13) - Conds:RS_57._col1=RS_345._col0(Inner),Output:["_col2","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_345] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_343] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_57] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_295] (rows=7751872 width=98) - Conds:RS_360._col0=RS_327._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_327] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_324] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_360] - PartitionCols:_col0 - Select Operator [SEL_359] (rows=285117733 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_358] (rows=285117733 width=123) - predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_55_date_dim_d_date_sk_min) AND DynamicValue(RS_55_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_55_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_45] (rows=287989836 width=123) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_357] - Group By Operator [GBY_356] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_335] - Group By Operator [GBY_332] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_328] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_324] + Merge Join Operator [MERGEJOIN_299] (rows=1550375 width=13) + Conds:RS_329._col0=RS_60._col1(Inner),Output:["_col3","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_329] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_327] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_298] (rows=7751872 width=98) + Conds:RS_363._col0=RS_335._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_335] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_332] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_363] + PartitionCols:_col0 + Select Operator [SEL_362] (rows=285117733 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_361] (rows=285117733 width=123) + predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_56_date_dim_d_date_sk_min) AND DynamicValue(RS_56_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_56_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_49] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_360] + Group By Operator [GBY_359] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_343] + Group By Operator [GBY_340] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_336] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_332] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query57.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query57.q.out index 49a1ba535d..fd9e31341b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query57.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query57.q.out @@ -123,140 +123,140 @@ Stage-0 limit:-1 Stage-1 Reducer 8 vectorized - File Output Operator [FS_322] - Limit [LIM_321] (rows=100 width=758) + File Output Operator [FS_323] + Limit [LIM_322] (rows=100 width=758) Number of rows:100 - Select Operator [SEL_320] (rows=397735 width=758) + Select Operator [SEL_321] (rows=397735 width=758) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_110] - Select Operator [SEL_109] (rows=397735 width=758) + SHUFFLE [RS_111] + Select Operator [SEL_110] (rows=397735 width=758) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Top N Key Operator [TNK_178] (rows=397735 width=646) - keys:(_col10 - _col11), _col8,top n:100 - Merge Join Operator [MERGEJOIN_279] (rows=397735 width=646) - Conds:RS_106._col5, _col6, _col12, _col7=RS_307._col0, _col1, _col4, _col2(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col10","_col11","_col16"] + Top N Key Operator [TNK_179] (rows=397735 width=646) + keys:(_col15 - _col16), _col13,top n:100 + Merge Join Operator [MERGEJOIN_280] (rows=397735 width=646) + Conds:RS_308._col0, _col1, _col4, _col2=RS_108._col5, _col6, _col12, _col7(Inner),Output:["_col3","_col8","_col10","_col11","_col13","_col14","_col15","_col16"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_307] + SHUFFLE [RS_308] PartitionCols:_col0, _col1, _col4, _col2 - Select Operator [SEL_305] (rows=2386410 width=404) + Select Operator [SEL_306] (rows=2386410 width=404) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_303] (rows=2386410 width=408) + Filter Operator [FIL_304] (rows=2386410 width=408) predicate:rank_window_0 is not null - PTF Operator [PTF_301] (rows=2386410 width=408) + PTF Operator [PTF_302] (rows=2386410 width=408) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS LAST, _col1 ASC NULLS LAST","partition by:":"_col4, _col3, _col2"}] - Select Operator [SEL_300] (rows=2386410 width=408) + Select Operator [SEL_301] (rows=2386410 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] + SHUFFLE [RS_299] PartitionCols:_col4, _col3, _col2 - Group By Operator [GBY_297] (rows=2386410 width=408) + Group By Operator [GBY_298] (rows=2386410 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_93] + SHUFFLE [RS_23] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_92] (rows=87441185 width=408) + Group By Operator [GBY_22] (rows=87441185 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col8, _col10, _col11 - Merge Join Operator [MERGEJOIN_277] (rows=87441185 width=406) - Conds:RS_88._col2=RS_296._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col10","_col11"] + Merge Join Operator [MERGEJOIN_272] (rows=87441185 width=406) + Conds:RS_18._col2=RS_297._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col10","_col11"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_296] + SHUFFLE [RS_297] PartitionCols:_col0 - Select Operator [SEL_295] (rows=462000 width=194) + Select Operator [SEL_296] (rows=462000 width=194) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_294] (rows=462000 width=194) + Filter Operator [FIL_295] (rows=462000 width=194) predicate:(i_category is not null and i_brand is not null) - TableScan [TS_79] (rows=462000 width=194) + TableScan [TS_9] (rows=462000 width=194) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_category"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_88] + SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_276] (rows=87441185 width=220) - Conds:RS_85._col1=RS_293._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8"] + Merge Join Operator [MERGEJOIN_271] (rows=87441185 width=220) + Conds:RS_15._col1=RS_294._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_293] + SHUFFLE [RS_294] PartitionCols:_col0 - Select Operator [SEL_292] (rows=60 width=102) + Select Operator [SEL_293] (rows=60 width=102) Output:["_col0","_col1"] - Filter Operator [FIL_291] (rows=60 width=102) + Filter Operator [FIL_292] (rows=60 width=102) predicate:cc_name is not null - TableScan [TS_76] (rows=60 width=102) + TableScan [TS_6] (rows=60 width=102) default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_name"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_85] + SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_275] (rows=87441185 width=126) - Conds:RS_290._col0=RS_282._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + Merge Join Operator [MERGEJOIN_270] (rows=87441185 width=126) + Conds:RS_291._col0=RS_283._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_282] + SHUFFLE [RS_283] PartitionCols:_col0 - Select Operator [SEL_281] (rows=564 width=12) + Select Operator [SEL_282] (rows=564 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_280] (rows=564 width=12) + Filter Operator [FIL_281] (rows=564 width=12) predicate:((d_year) IN (2000, 1999, 2001) and ((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1)))) - TableScan [TS_73] (rows=73049 width=12) + TableScan [TS_3] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_290] + SHUFFLE [RS_291] PartitionCols:_col0 - Select Operator [SEL_289] (rows=285117980 width=123) + Select Operator [SEL_290] (rows=285117980 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_288] (rows=285117980 width=123) - predicate:(cs_sold_date_sk is not null and cs_call_center_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_83_date_dim_d_date_sk_min) AND DynamicValue(RS_83_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_83_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_70] (rows=287989836 width=123) + Filter Operator [FIL_289] (rows=285117980 width=123) + predicate:(cs_sold_date_sk is not null and cs_call_center_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=287989836 width=123) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_call_center_sk","cs_item_sk","cs_sales_price"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_287] - Group By Operator [GBY_286] (rows=1 width=12) + BROADCAST [RS_288] + Group By Operator [GBY_287] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_285] - Group By Operator [GBY_284] (rows=1 width=12) + SHUFFLE [RS_286] + Group By Operator [GBY_285] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_283] (rows=564 width=4) + Select Operator [SEL_284] (rows=564 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_281] + Please refer to the previous Select Operator [SEL_282] <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_106] + SHUFFLE [RS_108] PartitionCols:_col5, _col6, _col12, _col7 - Merge Join Operator [MERGEJOIN_278] (rows=397735 width=636) - Conds:RS_308._col0, _col1, _col4, _col2=RS_319._col0, _col1, _col7, _col2(Inner),Output:["_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Merge Join Operator [MERGEJOIN_279] (rows=397735 width=636) + Conds:RS_309._col0, _col1, _col4, _col2=RS_320._col0, _col1, _col7, _col2(Inner),Output:["_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_308] + SHUFFLE [RS_309] PartitionCols:_col0, _col1, _col4, _col2 - Select Operator [SEL_306] (rows=2386410 width=404) + Select Operator [SEL_307] (rows=2386410 width=404) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_304] (rows=2386410 width=408) + Filter Operator [FIL_305] (rows=2386410 width=408) predicate:rank_window_0 is not null - PTF Operator [PTF_302] (rows=2386410 width=408) + PTF Operator [PTF_303] (rows=2386410 width=408) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS LAST, _col1 ASC NULLS LAST","partition by:":"_col4, _col3, _col2"}] - Please refer to the previous Select Operator [SEL_300] + Please refer to the previous Select Operator [SEL_301] <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] + SHUFFLE [RS_320] PartitionCols:_col0, _col1, _col7, _col2 - Select Operator [SEL_318] (rows=397735 width=524) + Select Operator [SEL_319] (rows=397735 width=524) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_317] (rows=397735 width=524) + Filter Operator [FIL_318] (rows=397735 width=524) predicate:CASE WHEN ((_col0 > 0)) THEN (((abs((_col6 - _col0)) / _col0) > 0.1)) ELSE (false) END - Select Operator [SEL_316] (rows=795470 width=520) + Select Operator [SEL_317] (rows=795470 width=520) Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_315] (rows=795470 width=520) + Filter Operator [FIL_316] (rows=795470 width=520) predicate:((_col0 > 0) and rank_window_1 is not null and (_col1 = 2000)) - PTF Operator [PTF_314] (rows=2386410 width=520) + PTF Operator [PTF_315] (rows=2386410 width=520) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS LAST, _col2 ASC NULLS LAST","partition by:":"_col5, _col4, _col3"}] - Select Operator [SEL_313] (rows=2386410 width=520) + Select Operator [SEL_314] (rows=2386410 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] + SHUFFLE [RS_313] PartitionCols:_col4, _col3, _col2 - Select Operator [SEL_311] (rows=2386410 width=408) + Select Operator [SEL_312] (rows=2386410 width=408) Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5"] - PTF Operator [PTF_310] (rows=2386410 width=408) + PTF Operator [PTF_311] (rows=2386410 width=408) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST","partition by:":"_col4, _col3, _col2, _col0"}] - Select Operator [SEL_309] (rows=2386410 width=408) + Select Operator [SEL_310] (rows=2386410 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_299] + SHUFFLE [RS_300] PartitionCols:_col4, _col3, _col2, _col0 - Please refer to the previous Group By Operator [GBY_297] + Please refer to the previous Group By Operator [GBY_298] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query58.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query58.q.out index b1593b39e2..f2cb856498 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query58.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[407][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 20' is a cross product +Warning: Shuffle Join MERGEJOIN[413][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 22' is a cross product PREHOOK: query: explain with ss_items as (select i_item_id item_id @@ -142,254 +142,254 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 24 <- Reducer 9 (BROADCAST_EDGE) -Map 26 <- Reducer 13 (BROADCAST_EDGE) -Map 27 <- Reducer 17 (BROADCAST_EDGE) -Reducer 10 <- Map 26 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 11 <- Map 25 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 27 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 15 <- Map 25 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 17 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 20 <- Map 23 (CUSTOM_SIMPLE_EDGE), Reducer 19 (CUSTOM_SIMPLE_EDGE) -Reducer 21 <- Map 23 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (SIMPLE_EDGE) -Reducer 3 <- Map 24 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 25 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 12 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 16 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE) +Map 26 <- Reducer 14 (BROADCAST_EDGE) +Map 27 <- Reducer 18 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Map 26 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 25 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 27 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 16 <- Map 25 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 19 (CUSTOM_SIMPLE_EDGE), Reducer 24 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 25 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_471] - Limit [LIM_470] (rows=1 width=884) + Reducer 7 vectorized + File Output Operator [FS_477] + Limit [LIM_476] (rows=1 width=884) Number of rows:100 - Select Operator [SEL_469] (rows=1 width=884) + Select Operator [SEL_475] (rows=1 width=884) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_157] - Select Operator [SEL_156] (rows=1 width=884) + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_163] + Select Operator [SEL_162] (rows=1 width=884) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Top N Key Operator [TNK_263] (rows=1 width=660) + Top N Key Operator [TNK_269] (rows=1 width=660) keys:_col0, _col3,top n:100 - Filter Operator [FIL_152] (rows=1 width=660) + Filter Operator [FIL_158] (rows=1 width=660) predicate:(_col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col1 BETWEEN _col6 AND _col7 and _col3 BETWEEN _col6 AND _col7) - Merge Join Operator [MERGEJOIN_423] (rows=384 width=660) - Conds:RS_149._col0=RS_468._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7"] - <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_468] + Merge Join Operator [MERGEJOIN_429] (rows=384 width=660) + Conds:RS_155._col0=RS_474._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7"] + <-Reducer 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_474] PartitionCols:_col0 - Select Operator [SEL_467] (rows=15768 width=436) + Select Operator [SEL_473] (rows=15768 width=436) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_466] (rows=15768 width=212) + Group By Operator [GBY_472] (rows=15768 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_141] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_147] PartitionCols:_col0 - Group By Operator [GBY_140] (rows=15768 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col5)"],keys:_col7 - Merge Join Operator [MERGEJOIN_421] (rows=31537 width=100) - Conds:RS_136._col4=RS_451._col0(Inner),Output:["_col5","_col7"] + Group By Operator [GBY_146] (rows=15768 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 + Merge Join Operator [MERGEJOIN_427] (rows=31537 width=100) + Conds:RS_142._col1=RS_457._col0(Inner),Output:["_col2","_col7"] <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_451] + SHUFFLE [RS_457] PartitionCols:_col0 - Select Operator [SEL_448] (rows=462000 width=104) + Select Operator [SEL_454] (rows=462000 width=104) Output:["_col0","_col1"] - TableScan [TS_32] (rows=462000 width=104) + TableScan [TS_37] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_136] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_420] (rows=31537 width=4) - Conds:RS_133._col0=RS_465._col0(Inner),Output:["_col4","_col5"] - <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_133] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_142] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_426] (rows=31537 width=4) + Conds:RS_471._col0=RS_140._col0(Inner),Output:["_col1","_col2"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_140] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_409] (rows=2 width=4) - Conds:RS_426._col1=RS_442._col0(Inner),Output:["_col0"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_426] + Merge Join Operator [MERGEJOIN_415] (rows=2 width=4) + Conds:RS_432._col1=RS_448._col0(Inner),Output:["_col0"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_432] PartitionCols:_col1 - Select Operator [SEL_425] (rows=73049 width=98) + Select Operator [SEL_431] (rows=73049 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_424] (rows=73049 width=98) + Filter Operator [FIL_430] (rows=73049 width=98) predicate:d_date is not null - TableScan [TS_0] (rows=73049 width=98) + TableScan [TS_3] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_442] + <-Reducer 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_448] PartitionCols:_col0 - Group By Operator [GBY_441] (rows=2 width=94) + Group By Operator [GBY_447] (rows=2 width=94) Output:["_col0"],keys:KEY._col0 - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_26] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col0 - Group By Operator [GBY_25] (rows=2 width=94) - Output:["_col0"],keys:_col2 - Merge Join Operator [MERGEJOIN_408] (rows=5 width=94) - Conds:RS_21._col1=RS_439._col1(Inner),Output:["_col2"] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_439] + Group By Operator [GBY_29] (rows=2 width=94) + Output:["_col0"],keys:_col0 + Merge Join Operator [MERGEJOIN_414] (rows=5 width=94) + Conds:RS_437._col1=RS_26._col1(Inner),Output:["_col0"] + <-Map 19 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_437] PartitionCols:_col1 - Select Operator [SEL_437] (rows=73049 width=98) + Select Operator [SEL_435] (rows=73049 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_435] (rows=73049 width=98) + Filter Operator [FIL_433] (rows=73049 width=98) predicate:(d_week_seq is not null and d_date is not null) - TableScan [TS_15] (rows=73049 width=98) + TableScan [TS_6] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date","d_week_seq"] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_21] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_26] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_407] (rows=1 width=4) + Merge Join Operator [MERGEJOIN_413] (rows=1 width=4) Conds:(Inner),Output:["_col1"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_440] - Select Operator [SEL_438] (rows=1 width=4) + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_438] + Select Operator [SEL_436] (rows=1 width=4) Output:["_col0"] - Filter Operator [FIL_436] (rows=1 width=98) + Filter Operator [FIL_434] (rows=1 width=98) predicate:((d_date = '1998-02-19') and d_week_seq is not null) - Please refer to the previous TableScan [TS_15] - <-Reducer 19 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_434] - Select Operator [SEL_433] (rows=1 width=8) - Filter Operator [FIL_432] (rows=1 width=8) + Please refer to the previous TableScan [TS_6] + <-Reducer 24 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_446] + Select Operator [SEL_445] (rows=1 width=8) + Filter Operator [FIL_444] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_431] (rows=1 width=8) + Group By Operator [GBY_443] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_430] - Group By Operator [GBY_429] (rows=1 width=8) + <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_442] + Group By Operator [GBY_441] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_428] (rows=1 width=94) - Filter Operator [FIL_427] (rows=1 width=94) + Select Operator [SEL_440] (rows=1 width=94) + Filter Operator [FIL_439] (rows=1 width=94) predicate:(d_date = '1998-02-19') - TableScan [TS_3] (rows=73049 width=94) + TableScan [TS_9] (rows=73049 width=94) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date"] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_465] + SHUFFLE [RS_471] PartitionCols:_col0 - Select Operator [SEL_464] (rows=143966864 width=119) + Select Operator [SEL_470] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_463] (rows=143966864 width=119) - predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_133_date_dim_d_date_sk_min) AND DynamicValue(RS_133_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_133_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_125] (rows=144002668 width=119) + Filter Operator [FIL_469] (rows=143966864 width=119) + predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_140_date_dim_d_date_sk_min) AND DynamicValue(RS_140_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_140_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_100] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_sales_price"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_462] - Group By Operator [GBY_461] (rows=1 width=12) + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_468] + Group By Operator [GBY_467] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_387] - Group By Operator [GBY_386] (rows=1 width=12) + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_368] + Group By Operator [GBY_367] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_385] (rows=2 width=4) + Select Operator [SEL_366] (rows=2 width=4) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_409] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_149] + Please refer to the previous Merge Join Operator [MERGEJOIN_415] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_155] PartitionCols:_col0 - Filter Operator [FIL_147] (rows=384 width=324) + Filter Operator [FIL_153] (rows=384 width=324) predicate:(_col1 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col3 BETWEEN (0.9 * _col1) AND (1.1 * _col1)) - Merge Join Operator [MERGEJOIN_422] (rows=31163 width=324) - Conds:RS_453._col0=RS_460._col0(Inner),Output:["_col0","_col1","_col3"] - <-Reducer 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_460] + Merge Join Operator [MERGEJOIN_428] (rows=31163 width=324) + Conds:RS_459._col0=RS_466._col0(Inner),Output:["_col0","_col1","_col3"] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_466] PartitionCols:_col0 - Group By Operator [GBY_459] (rows=60249 width=212) + Group By Operator [GBY_465] (rows=60249 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_93] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_97] PartitionCols:_col0 - Group By Operator [GBY_92] (rows=60249 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col5)"],keys:_col7 - Merge Join Operator [MERGEJOIN_416] (rows=120498 width=100) - Conds:RS_88._col4=RS_450._col0(Inner),Output:["_col5","_col7"] + Group By Operator [GBY_96] (rows=60249 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 + Merge Join Operator [MERGEJOIN_422] (rows=120498 width=100) + Conds:RS_92._col1=RS_456._col0(Inner),Output:["_col2","_col7"] <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_450] + SHUFFLE [RS_456] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_448] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_88] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_415] (rows=120498 width=4) - Conds:RS_85._col0=RS_458._col0(Inner),Output:["_col4","_col5"] - <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_85] + Please refer to the previous Select Operator [SEL_454] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_92] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_421] (rows=120498 width=4) + Conds:RS_464._col0=RS_90._col0(Inner),Output:["_col1","_col2"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_90] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_409] + Please refer to the previous Merge Join Operator [MERGEJOIN_415] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_458] + SHUFFLE [RS_464] PartitionCols:_col0 - Select Operator [SEL_457] (rows=550076554 width=114) + Select Operator [SEL_463] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_456] (rows=550076554 width=114) - predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_85_date_dim_d_date_sk_min) AND DynamicValue(RS_85_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_85_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_77] (rows=575995635 width=114) + Filter Operator [FIL_462] (rows=550076554 width=114) + predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_90_date_dim_d_date_sk_min) AND DynamicValue(RS_90_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_90_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_50] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_455] - Group By Operator [GBY_454] (rows=1 width=12) + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_461] + Group By Operator [GBY_460] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_341] - Group By Operator [GBY_340] (rows=1 width=12) + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_322] + Group By Operator [GBY_321] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_339] (rows=2 width=4) + Select Operator [SEL_320] (rows=2 width=4) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_409] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_453] + Please refer to the previous Merge Join Operator [MERGEJOIN_415] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_459] PartitionCols:_col0 - Group By Operator [GBY_452] (rows=31163 width=212) + Group By Operator [GBY_458] (rows=31163 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_45] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_47] PartitionCols:_col0 - Group By Operator [GBY_44] (rows=31163 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col5)"],keys:_col7 - Merge Join Operator [MERGEJOIN_411] (rows=62327 width=100) - Conds:RS_40._col4=RS_449._col0(Inner),Output:["_col5","_col7"] + Group By Operator [GBY_46] (rows=31163 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 + Merge Join Operator [MERGEJOIN_417] (rows=62327 width=100) + Conds:RS_42._col1=RS_455._col0(Inner),Output:["_col2","_col7"] <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_449] + SHUFFLE [RS_455] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_448] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_410] (rows=62327 width=4) - Conds:RS_37._col0=RS_447._col0(Inner),Output:["_col4","_col5"] - <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_37] + Please refer to the previous Select Operator [SEL_454] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_416] (rows=62327 width=4) + Conds:RS_453._col0=RS_40._col0(Inner),Output:["_col1","_col2"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_40] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_409] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_447] + Please refer to the previous Merge Join Operator [MERGEJOIN_415] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_453] PartitionCols:_col0 - Select Operator [SEL_446] (rows=286549727 width=119) + Select Operator [SEL_452] (rows=286549727 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_445] (rows=286549727 width=119) - predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_37_date_dim_d_date_sk_min) AND DynamicValue(RS_37_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_37_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_29] (rows=287989836 width=119) + Filter Operator [FIL_451] (rows=286549727 width=119) + predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=287989836 width=119) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_444] - Group By Operator [GBY_443] (rows=1 width=12) + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_450] + Group By Operator [GBY_449] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_291] - Group By Operator [GBY_290] (rows=1 width=12) + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_272] + Group By Operator [GBY_271] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_289] (rows=2 width=4) + Select Operator [SEL_270] (rows=2 width=4) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_409] + Please refer to the previous Merge Join Operator [MERGEJOIN_415] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query59.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query59.q.out index f7c7260077..50871161cc 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query59.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query59.q.out @@ -95,142 +95,142 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 14 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 15 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 13 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 8 <- Map 14 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 13 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_208] - Limit [LIM_207] (rows=100 width=976) + Reducer 4 vectorized + File Output Operator [FS_210] + Limit [LIM_209] (rows=100 width=976) Number of rows:100 - Select Operator [SEL_206] (rows=1012347 width=976) + Select Operator [SEL_208] (rows=1012347 width=976) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_57] - Select Operator [SEL_56] (rows=1012347 width=976) + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_59] + Select Operator [SEL_58] (rows=1012347 width=976) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Top N Key Operator [TNK_95] (rows=1012347 width=1648) - keys:_col12, _col11, _col0,top n:100 - Merge Join Operator [MERGEJOIN_182] (rows=1012347 width=1648) - Conds:RS_53._col11, _col0=RS_54._col1, (_col0 - 52)(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col20"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_54] + Top N Key Operator [TNK_97] (rows=1012347 width=1648) + keys:_col2, _col1, _col3,top n:100 + Merge Join Operator [MERGEJOIN_184] (rows=1012347 width=1648) + Conds:RS_55._col1, _col3=RS_56._col1, (_col0 - 52)(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col15","_col16","_col17","_col18","_col19","_col20"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_56] PartitionCols:_col1, (_col0 - 52) - Select Operator [SEL_46] (rows=28847 width=776) + Select Operator [SEL_51] (rows=28847 width=776) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_181] (rows=28847 width=776) - Conds:RS_43._col1=RS_205._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + Merge Join Operator [MERGEJOIN_183] (rows=28847 width=776) + Conds:RS_207._col0=RS_49._col1(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_205] + SHUFFLE [RS_207] PartitionCols:_col0 - Select Operator [SEL_204] (rows=1704 width=104) + Select Operator [SEL_206] (rows=1704 width=104) Output:["_col0","_col1"] - TableScan [TS_38] (rows=1704 width=104) + TableScan [TS_24] (rows=1704 width=104) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_43] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_49] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_180] (rows=28847 width=676) - Conds:RS_203._col0=RS_199._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_199] + Merge Join Operator [MERGEJOIN_182] (rows=28847 width=676) + Conds:RS_205._col0=RS_203._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_203] PartitionCols:_col0 - Select Operator [SEL_197] (rows=317 width=4) + Select Operator [SEL_201] (rows=317 width=4) Output:["_col0"] - Filter Operator [FIL_195] (rows=317 width=8) + Filter Operator [FIL_199] (rows=317 width=8) predicate:(d_month_seq BETWEEN 1197 AND 1208 and d_week_seq is not null) - TableScan [TS_15] (rows=73049 width=8) + TableScan [TS_17] (rows=73049 width=8) default@date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_week_seq"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_203] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_205] PartitionCols:_col0 - Group By Operator [GBY_202] (rows=1196832 width=679) + Group By Operator [GBY_204] (rows=1196832 width=679) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)"],keys:KEY._col0, KEY._col1 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_32] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_38] PartitionCols:_col0, _col1 - Group By Operator [GBY_31] (rows=525329897 width=679) + Group By Operator [GBY_37] (rows=525329897 width=679) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)","sum(_col3)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Select Operator [SEL_29] (rows=525329897 width=138) + Select Operator [SEL_35] (rows=525329897 width=138) Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_179] (rows=525329897 width=138) - Conds:RS_186._col0=RS_191._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col8","_col9","_col10","_col11"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_186] - PartitionCols:_col0 - Select Operator [SEL_184] (rows=525329897 width=114) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_183] (rows=525329897 width=114) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_191] + Merge Join Operator [MERGEJOIN_181] (rows=525329897 width=138) + Conds:RS_190._col0=RS_195._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col8","_col9","_col10","_col11"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_195] PartitionCols:_col0 - Select Operator [SEL_189] (rows=73049 width=36) + Select Operator [SEL_193] (rows=73049 width=36) Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_187] (rows=73049 width=99) + Filter Operator [FIL_191] (rows=73049 width=99) predicate:d_week_seq is not null - TableScan [TS_3] (rows=73049 width=99) + TableScan [TS_5] (rows=73049 width=99) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq","d_day_name"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_53] - PartitionCols:_col11, _col0 - Merge Join Operator [MERGEJOIN_178] (rows=28847 width=976) - Conds:RS_50._col1=RS_201._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col11","_col12"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_190] + PartitionCols:_col0 + Select Operator [SEL_188] (rows=525329897 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_187] (rows=525329897 width=114) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_2] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col1, _col3 + Merge Join Operator [MERGEJOIN_180] (rows=28847 width=976) + Conds:RS_186._col0=RS_53._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_186] PartitionCols:_col0 - Select Operator [SEL_200] (rows=1704 width=192) + Select Operator [SEL_185] (rows=1704 width=192) Output:["_col0","_col1","_col2"] - TableScan [TS_18] (rows=1704 width=192) + TableScan [TS_0] (rows=1704 width=192) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_50] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_53] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_177] (rows=28847 width=788) - Conds:RS_193._col0=RS_198._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] + Merge Join Operator [MERGEJOIN_179] (rows=28847 width=788) + Conds:RS_197._col0=RS_202._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_202] PartitionCols:_col0 - Select Operator [SEL_196] (rows=317 width=4) + Select Operator [SEL_200] (rows=317 width=4) Output:["_col0"] - Filter Operator [FIL_194] (rows=317 width=8) + Filter Operator [FIL_198] (rows=317 width=8) predicate:(d_month_seq BETWEEN 1185 AND 1196 and d_week_seq is not null) - Please refer to the previous TableScan [TS_15] - <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] + Please refer to the previous TableScan [TS_17] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_197] PartitionCols:_col0 - Group By Operator [GBY_192] (rows=1196832 width=791) + Group By Operator [GBY_196] (rows=1196832 width=791) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col0, _col1 - Group By Operator [GBY_11] (rows=525329897 width=791) + Group By Operator [GBY_13] (rows=525329897 width=791) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Select Operator [SEL_9] (rows=525329897 width=142) + Select Operator [SEL_11] (rows=525329897 width=142) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_176] (rows=525329897 width=142) - Conds:RS_185._col0=RS_190._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_185] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_184] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_190] + Merge Join Operator [MERGEJOIN_178] (rows=525329897 width=142) + Conds:RS_189._col0=RS_194._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_194] PartitionCols:_col0 - Select Operator [SEL_188] (rows=73049 width=36) + Select Operator [SEL_192] (rows=73049 width=36) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Please refer to the previous Filter Operator [FIL_187] + Please refer to the previous Filter Operator [FIL_191] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_189] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_188] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query6.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query6.q.out index 0c822541da..5743de0ce3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query6.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query6.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[170][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[171][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: explain select a.ca_state state, count(*) cnt from customer_address a @@ -65,11 +65,11 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Reducer 7 (BROADCAST_EDGE) -Map 10 <- Reducer 13 (BROADCAST_EDGE) -Map 14 <- Reducer 16 (BROADCAST_EDGE) +Map 10 <- Reducer 16 (BROADCAST_EDGE) +Map 12 <- Reducer 14 (BROADCAST_EDGE) Map 15 <- Reducer 8 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE) Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 15 (BROADCAST_EDGE), Map 9 (SIMPLE_EDGE), Reducer 11 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) @@ -83,147 +83,147 @@ Stage-0 limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_236] - Limit [LIM_235] (rows=1 width=94) + File Output Operator [FS_238] + Limit [LIM_237] (rows=1 width=94) Number of rows:100 - Select Operator [SEL_234] (rows=1 width=94) + Select Operator [SEL_236] (rows=1 width=94) Output:["_col0","_col1"] <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_233] - Top N Key Operator [TNK_232] (rows=1 width=94) + SHUFFLE [RS_235] + Top N Key Operator [TNK_234] (rows=1 width=94) keys:_col1,top n:100 - Filter Operator [FIL_231] (rows=1 width=94) + Filter Operator [FIL_233] (rows=1 width=94) predicate:(_col1 >= 10L) - Group By Operator [GBY_230] (rows=1 width=94) + Group By Operator [GBY_232] (rows=1 width=94) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_69] + SHUFFLE [RS_70] PartitionCols:_col0 - Group By Operator [GBY_68] (rows=1 width=94) + Group By Operator [GBY_69] (rows=1 width=94) Output:["_col0","_col1"],aggregations:["count()"],keys:_col4 - Map Join Operator [MAPJOIN_176] (rows=36482 width=86) - Conds:MAPJOIN_175._col5=RS_221._col0(Inner),Output:["_col4"] + Map Join Operator [MAPJOIN_177] (rows=36482 width=86) + Conds:MAPJOIN_176._col5=RS_210._col0(Inner),Output:["_col4"] <-Map 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_221] + BROADCAST [RS_210] PartitionCols:_col0 - Map Join Operator [MAPJOIN_220] (rows=660 width=4) - Conds:SEL_219._col1=RS_217._col0(Inner),Output:["_col0"] + Map Join Operator [MAPJOIN_209] (rows=660 width=4) + Conds:SEL_208._col1=RS_206._col0(Inner),Output:["_col0"] <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_217] + BROADCAST [RS_206] PartitionCols:_col0 - Group By Operator [GBY_216] (rows=25 width=4) + Group By Operator [GBY_205] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_189] + SHUFFLE [RS_190] PartitionCols:_col0 - Group By Operator [GBY_187] (rows=25 width=4) + Group By Operator [GBY_188] (rows=25 width=4) Output:["_col0"],keys:d_month_seq - Select Operator [SEL_185] (rows=50 width=12) + Select Operator [SEL_186] (rows=50 width=12) Output:["d_month_seq"] - Filter Operator [FIL_183] (rows=50 width=12) + Filter Operator [FIL_184] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 2) and d_month_seq is not null) TableScan [TS_3] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] - <-Select Operator [SEL_219] (rows=73049 width=8) + <-Select Operator [SEL_208] (rows=73049 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_218] (rows=73049 width=8) + Filter Operator [FIL_207] (rows=73049 width=8) predicate:d_month_seq is not null - TableScan [TS_41] (rows=73049 width=8) + TableScan [TS_42] (rows=73049 width=8) default@date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] - <-Map Join Operator [MAPJOIN_175] (rows=36482 width=86) - Conds:MERGEJOIN_171._col0=RS_62._col2(Inner),Output:["_col4","_col5"] + <-Map Join Operator [MAPJOIN_176] (rows=36482 width=86) + Conds:MERGEJOIN_172._col0=RS_63._col2(Inner),Output:["_col4","_col5"] <-Reducer 11 [BROADCAST_EDGE] - BROADCAST [RS_62] + BROADCAST [RS_63] PartitionCols:_col2 - Select Operator [SEL_40] (rows=36482 width=0) - Output:["_col0","_col2"] - Merge Join Operator [MERGEJOIN_173] (rows=36482 width=0) - Conds:RS_215._col0=RS_229._col1(Inner),Output:["_col5","_col7"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] - PartitionCols:_col0 - Filter Operator [FIL_214] (rows=153611 width=227) + Merge Join Operator [MERGEJOIN_174] (rows=36482 width=0) + Conds:RS_218._col1=RS_231._col0(Inner),Output:["_col0","_col2"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_218] + PartitionCols:_col1 + Select Operator [SEL_217] (rows=525327388 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_216] (rows=525327388 width=11) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_66_d_d_date_sk_min) AND DynamicValue(RS_66_d_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_66_d_d_date_sk_bloom_filter))) + TableScan [TS_19] (rows=575995635 width=11) + default@store_sales,s,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_215] + Group By Operator [GBY_214] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] + PartitionCols:_col0 + Select Operator [SEL_230] (rows=153611 width=227) + Output:["_col0"] + Filter Operator [FIL_229] (rows=153611 width=227) predicate:(_col1 > _col4) - Map Join Operator [MAPJOIN_213] (rows=460833 width=227) - Conds:SEL_212._col2=RS_210._col0(Inner),Output:["_col0","_col1","_col4"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_210] + Map Join Operator [MAPJOIN_228] (rows=460833 width=227) + Conds:SEL_227._col2=RS_225._col0(Inner),Output:["_col0","_col1","_col4"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_225] PartitionCols:_col0 - Select Operator [SEL_209] (rows=10 width=202) + Select Operator [SEL_224] (rows=10 width=202) Output:["_col0","_col1"] - Filter Operator [FIL_208] (rows=10 width=210) + Filter Operator [FIL_223] (rows=10 width=210) predicate:CAST( CAST( (_col1 / _col2) AS decimal(11,6)) AS decimal(16,6)) is not null - Group By Operator [GBY_207] (rows=10 width=210) + Group By Operator [GBY_222] (rows=10 width=210) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_206] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_221] PartitionCols:_col0 - Group By Operator [GBY_205] (rows=10 width=210) + Group By Operator [GBY_220] (rows=10 width=210) Output:["_col0","_col1","_col2"],aggregations:["sum(i_current_price)","count(i_current_price)"],keys:i_category - Filter Operator [FIL_204] (rows=462000 width=201) + Filter Operator [FIL_219] (rows=462000 width=201) predicate:i_category is not null - TableScan [TS_22] (rows=462000 width=201) + TableScan [TS_25] (rows=462000 width=201) default@item,j,Tbl:COMPLETE,Col:COMPLETE,Output:["i_current_price","i_category"] - <-Select Operator [SEL_212] (rows=460833 width=205) + <-Select Operator [SEL_227] (rows=460833 width=205) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_211] (rows=460833 width=205) + Filter Operator [FIL_226] (rows=460833 width=205) predicate:(i_current_price is not null and i_category is not null) - TableScan [TS_19] (rows=462000 width=205) + TableScan [TS_22] (rows=462000 width=205) default@item,i,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_category"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_229] - PartitionCols:_col1 - Select Operator [SEL_228] (rows=525327388 width=11) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_227] (rows=525327388 width=11) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_65_d_d_date_sk_min) AND DynamicValue(RS_65_d_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_65_d_d_date_sk_bloom_filter))) - TableScan [TS_30] (rows=575995635 width=11) - default@store_sales,s,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_226] - Group By Operator [GBY_225] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Merge Join Operator [MERGEJOIN_171] (rows=80000000 width=90) - Conds:RS_201._col1=RS_203._col0(Inner),Output:["_col0","_col4"] + <-Merge Join Operator [MERGEJOIN_172] (rows=80000000 width=90) + Conds:RS_202._col1=RS_204._col0(Inner),Output:["_col0","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] + SHUFFLE [RS_202] PartitionCols:_col1 - Map Join Operator [MAPJOIN_200] (rows=80000000 width=8) + Map Join Operator [MAPJOIN_201] (rows=80000000 width=8) Conds:(Inner),Output:["_col0","_col1"] <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_197] - Select Operator [SEL_196] (rows=1 width=8) - Filter Operator [FIL_195] (rows=1 width=8) + BROADCAST [RS_198] + Select Operator [SEL_197] (rows=1 width=8) + Filter Operator [FIL_196] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_194] (rows=1 width=8) + Group By Operator [GBY_195] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_193] - Group By Operator [GBY_192] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_194] + Group By Operator [GBY_193] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_191] (rows=25 width=4) - Group By Operator [GBY_190] (rows=25 width=4) + Select Operator [SEL_192] (rows=25 width=4) + Group By Operator [GBY_191] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_188] + SHUFFLE [RS_189] PartitionCols:_col0 - Group By Operator [GBY_186] (rows=25 width=4) + Group By Operator [GBY_187] (rows=25 width=4) Output:["_col0"],keys:d_month_seq - Select Operator [SEL_184] (rows=50 width=12) + Select Operator [SEL_185] (rows=50 width=12) Output:["d_month_seq"] - Filter Operator [FIL_182] (rows=50 width=12) + Filter Operator [FIL_183] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 2)) Please refer to the previous TableScan [TS_3] - <-Select Operator [SEL_199] (rows=80000000 width=8) + <-Select Operator [SEL_200] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_198] (rows=80000000 width=8) + Filter Operator [FIL_199] (rows=80000000 width=8) predicate:c_current_addr_sk is not null TableScan [TS_0] (rows=80000000 width=8) default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_203] + SHUFFLE [RS_204] PartitionCols:_col0 - Select Operator [SEL_202] (rows=40000000 width=90) + Select Operator [SEL_203] (rows=40000000 width=90) Output:["_col0","_col1"] TableScan [TS_17] (rows=40000000 width=90) default@customer_address,a,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query60.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query60.q.out index a711f8a5a5..31feb2f16a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query60.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query60.q.out @@ -169,27 +169,27 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 14 <- Reducer 18 (BROADCAST_EDGE) -Map 26 <- Reducer 21 (BROADCAST_EDGE) -Map 27 <- Reducer 24 (BROADCAST_EDGE) -Reducer 10 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Map 18 <- Reducer 21 (BROADCAST_EDGE) +Map 26 <- Reducer 23 (BROADCAST_EDGE) +Map 27 <- Reducer 25 (BROADCAST_EDGE) +Reducer 10 <- Reducer 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 13 <- Map 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 16 <- Map 25 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 16 <- Map 14 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 17 <- Map 14 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 20 <- Map 25 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 17 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 23 <- Map 25 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 20 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 23 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 20 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 25 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 8 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 5 (CONTAINS) Stage-0 @@ -197,230 +197,224 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_364] - Limit [LIM_363] (rows=100 width=212) + File Output Operator [FS_367] + Limit [LIM_366] (rows=100 width=212) Number of rows:100 - Select Operator [SEL_362] (rows=1717 width=212) + Select Operator [SEL_365] (rows=1717 width=212) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_361] - Top N Key Operator [TNK_360] (rows=1717 width=212) + SHUFFLE [RS_364] + Top N Key Operator [TNK_363] (rows=1717 width=212) keys:_col0, _col1,top n:100 - Group By Operator [GBY_359] (rows=1717 width=212) + Group By Operator [GBY_362] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Union 5 [SIMPLE_EDGE] <-Reducer 11 [CONTAINS] vectorized - Reduce Output Operator [RS_382] + Reduce Output Operator [RS_385] PartitionCols:_col0 - Group By Operator [GBY_381] (rows=1717 width=212) + Group By Operator [GBY_384] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_380] (rows=5151 width=212) + Top N Key Operator [TNK_383] (rows=5151 width=212) keys:_col0,top n:100 - Group By Operator [GBY_379] (rows=1717 width=212) + Group By Operator [GBY_382] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_106] + SHUFFLE [RS_109] PartitionCols:_col0 - Group By Operator [GBY_105] (rows=1717 width=212) + Group By Operator [GBY_108] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_307] (rows=379339 width=201) - Conds:RS_101._col0=RS_102._col2(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_310] (rows=379339 width=201) + Conds:RS_104._col0=RS_105._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_101] + SHUFFLE [RS_104] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_296] (rows=34340 width=104) - Conds:RS_324._col1=RS_330._col0(Inner),Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_299] (rows=34340 width=104) + Conds:RS_327._col1=RS_333._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_324] + SHUFFLE [RS_327] PartitionCols:_col1 - Select Operator [SEL_323] (rows=462000 width=104) + Select Operator [SEL_326] (rows=462000 width=104) Output:["_col0","_col1"] TableScan [TS_0] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_330] + SHUFFLE [RS_333] PartitionCols:_col0 - Group By Operator [GBY_329] (rows=23100 width=100) + Group By Operator [GBY_332] (rows=23100 width=100) Output:["_col0"],keys:KEY._col0 <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_328] + SHUFFLE [RS_331] PartitionCols:_col0 - Group By Operator [GBY_327] (rows=23100 width=100) + Group By Operator [GBY_330] (rows=23100 width=100) Output:["_col0"],keys:i_item_id - Select Operator [SEL_326] (rows=46200 width=190) + Select Operator [SEL_329] (rows=46200 width=190) Output:["i_item_id"] - Filter Operator [FIL_325] (rows=46200 width=190) + Filter Operator [FIL_328] (rows=46200 width=190) predicate:(i_category = 'Children') TableScan [TS_2] (rows=462000 width=190) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_category"] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_102] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_105] PartitionCols:_col2 - Select Operator [SEL_97] (rows=788222 width=110) - Output:["_col2","_col4"] - Merge Join Operator [MERGEJOIN_304] (rows=788222 width=110) - Conds:RS_94._col2=RS_354._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_354] - PartitionCols:_col0 - Select Operator [SEL_351] (rows=8000000 width=4) - Output:["_col0"] - Filter Operator [FIL_350] (rows=8000000 width=112) - predicate:(ca_gmt_offset = -6) - TableScan [TS_15] (rows=40000000 width=112) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_94] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_303] (rows=3941109 width=118) - Conds:RS_378._col0=RS_337._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_337] - PartitionCols:_col0 - Select Operator [SEL_332] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_331] (rows=50 width=12) - predicate:((d_year = 1999) and (d_moy = 9)) - TableScan [TS_12] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_378] - PartitionCols:_col0 - Select Operator [SEL_377] (rows=143931246 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_376] (rows=143931246 width=123) - predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_92_date_dim_d_date_sk_min) AND DynamicValue(RS_92_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_92_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_82] (rows=144002668 width=123) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_375] - Group By Operator [GBY_374] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_344] - Group By Operator [GBY_341] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_338] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_332] + Merge Join Operator [MERGEJOIN_307] (rows=788222 width=110) + Conds:RS_338._col0=RS_98._col2(Inner),Output:["_col2","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_338] + PartitionCols:_col0 + Select Operator [SEL_335] (rows=8000000 width=4) + Output:["_col0"] + Filter Operator [FIL_334] (rows=8000000 width=112) + predicate:(ca_gmt_offset = -6) + TableScan [TS_9] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_98] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_306] (rows=3941109 width=118) + Conds:RS_381._col0=RS_345._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_345] + PartitionCols:_col0 + Select Operator [SEL_340] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_339] (rows=50 width=12) + predicate:((d_year = 1999) and (d_moy = 9)) + TableScan [TS_15] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_381] + PartitionCols:_col0 + Select Operator [SEL_380] (rows=143931246 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_379] (rows=143931246 width=123) + predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_94_date_dim_d_date_sk_min) AND DynamicValue(RS_94_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_94_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_87] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_378] + Group By Operator [GBY_377] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_352] + Group By Operator [GBY_349] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_346] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_340] <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_358] + Reduce Output Operator [RS_361] PartitionCols:_col0 - Group By Operator [GBY_357] (rows=1717 width=212) + Group By Operator [GBY_360] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_356] (rows=5151 width=212) + Top N Key Operator [TNK_359] (rows=5151 width=212) keys:_col0,top n:100 - Group By Operator [GBY_355] (rows=1717 width=212) + Group By Operator [GBY_358] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_33] + SHUFFLE [RS_34] PartitionCols:_col0 - Group By Operator [GBY_32] (rows=1717 width=212) + Group By Operator [GBY_33] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_305] (rows=1384530 width=100) - Conds:RS_28._col0=RS_29._col2(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_308] (rows=1384530 width=100) + Conds:RS_29._col0=RS_30._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_28] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_296] - <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_29] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_299] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col2 - Select Operator [SEL_24] (rows=2876890 width=4) - Output:["_col2","_col4"] - Merge Join Operator [MERGEJOIN_298] (rows=2876890 width=4) - Conds:RS_21._col2=RS_352._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_352] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_351] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_297] (rows=14384447 width=4) - Conds:RS_349._col0=RS_333._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_333] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_332] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_349] - PartitionCols:_col0 - Select Operator [SEL_348] (rows=525327191 width=118) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_347] (rows=525327191 width=118) - predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_9] (rows=575995635 width=118) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_346] - Group By Operator [GBY_345] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_342] - Group By Operator [GBY_339] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_334] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_332] + Merge Join Operator [MERGEJOIN_301] (rows=2876890 width=4) + Conds:RS_336._col0=RS_23._col2(Inner),Output:["_col2","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_336] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_335] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_300] (rows=14384447 width=4) + Conds:RS_357._col0=RS_341._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_341] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_340] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_357] + PartitionCols:_col0 + Select Operator [SEL_356] (rows=525327191 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_355] (rows=525327191 width=118) + predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_12] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_354] + Group By Operator [GBY_353] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_350] + Group By Operator [GBY_347] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_342] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_340] <-Reducer 9 [CONTAINS] vectorized - Reduce Output Operator [RS_373] + Reduce Output Operator [RS_376] PartitionCols:_col0 - Group By Operator [GBY_372] (rows=1717 width=212) + Group By Operator [GBY_375] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_371] (rows=5151 width=212) + Top N Key Operator [TNK_374] (rows=5151 width=212) keys:_col0,top n:100 - Group By Operator [GBY_370] (rows=1717 width=212) + Group By Operator [GBY_373] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_69] + SHUFFLE [RS_71] PartitionCols:_col0 - Group By Operator [GBY_68] (rows=1717 width=212) + Group By Operator [GBY_70] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_306] (rows=746132 width=100) - Conds:RS_64._col0=RS_65._col3(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_309] (rows=746132 width=100) + Conds:RS_66._col0=RS_67._col3(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_64] + SHUFFLE [RS_66] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_296] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_65] + Please refer to the previous Merge Join Operator [MERGEJOIN_299] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_67] PartitionCols:_col3 - Select Operator [SEL_60] (rows=1550375 width=13) - Output:["_col3","_col4"] - Merge Join Operator [MERGEJOIN_301] (rows=1550375 width=13) - Conds:RS_57._col1=RS_353._col0(Inner),Output:["_col2","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_353] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_351] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_57] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_300] (rows=7751872 width=98) - Conds:RS_369._col0=RS_335._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_335] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_332] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_369] - PartitionCols:_col0 - Select Operator [SEL_368] (rows=285117733 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_367] (rows=285117733 width=123) - predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_55_date_dim_d_date_sk_min) AND DynamicValue(RS_55_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_55_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_45] (rows=287989836 width=123) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_366] - Group By Operator [GBY_365] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_343] - Group By Operator [GBY_340] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_336] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_332] + Merge Join Operator [MERGEJOIN_304] (rows=1550375 width=13) + Conds:RS_337._col0=RS_60._col1(Inner),Output:["_col3","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_337] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_335] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_303] (rows=7751872 width=98) + Conds:RS_372._col0=RS_343._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_343] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_340] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_372] + PartitionCols:_col0 + Select Operator [SEL_371] (rows=285117733 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_370] (rows=285117733 width=123) + predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_56_date_dim_d_date_sk_min) AND DynamicValue(RS_56_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_56_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_49] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_369] + Group By Operator [GBY_368] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_351] + Group By Operator [GBY_348] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_344] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_340] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out index e36adb5e9e..5519ef6de3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out @@ -265,68 +265,68 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 45 (BROADCAST_EDGE) -Map 35 <- Reducer 20 (BROADCAST_EDGE) -Map 50 <- Reducer 47 (BROADCAST_EDGE) +Map 31 <- Reducer 37 (BROADCAST_EDGE) +Map 40 <- Reducer 35 (BROADCAST_EDGE) +Map 50 <- Reducer 39 (BROADCAST_EDGE) Map 54 <- Reducer 53 (BROADCAST_EDGE) -Reducer 10 <- Map 44 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 44 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 48 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 48 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Map 49 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Map 49 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 21 (SIMPLE_EDGE), Reducer 46 (SIMPLE_EDGE) -Reducer 23 <- Reducer 22 (SIMPLE_EDGE), Reducer 40 (SIMPLE_EDGE) -Reducer 24 <- Map 41 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) -Reducer 25 <- Map 42 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Map 43 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Map 43 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) -Reducer 28 <- Map 44 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) -Reducer 29 <- Map 44 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) -Reducer 3 <- Map 44 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 48 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) -Reducer 31 <- Map 48 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) -Reducer 32 <- Map 49 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) -Reducer 33 <- Map 49 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) -Reducer 34 <- Reducer 33 (SIMPLE_EDGE) -Reducer 36 <- Map 35 (SIMPLE_EDGE), Map 38 (SIMPLE_EDGE) -Reducer 37 <- Reducer 36 (SIMPLE_EDGE) -Reducer 39 <- Map 38 (SIMPLE_EDGE), Map 54 (SIMPLE_EDGE) -Reducer 4 <- Map 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 40 <- Reducer 39 (SIMPLE_EDGE) -Reducer 45 <- Map 44 (CUSTOM_SIMPLE_EDGE) -Reducer 46 <- Map 44 (SIMPLE_EDGE), Reducer 51 (SIMPLE_EDGE) -Reducer 47 <- Map 44 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 37 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 10 <- Map 49 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 49 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Map 1 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 16 <- Map 46 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Map 47 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Map 47 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Map 36 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 20 <- Map 36 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 48 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 48 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Map 49 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Map 49 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Reducer 24 (SIMPLE_EDGE) +Reducer 27 <- Map 26 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (SIMPLE_EDGE), Reducer 42 (SIMPLE_EDGE) +Reducer 29 <- Map 26 (SIMPLE_EDGE), Reducer 38 (SIMPLE_EDGE) +Reducer 3 <- Map 46 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Reducer 29 (SIMPLE_EDGE), Reducer 45 (SIMPLE_EDGE) +Reducer 32 <- Map 31 (SIMPLE_EDGE), Map 34 (SIMPLE_EDGE) +Reducer 33 <- Map 36 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) +Reducer 35 <- Map 34 (CUSTOM_SIMPLE_EDGE) +Reducer 37 <- Map 36 (CUSTOM_SIMPLE_EDGE) +Reducer 38 <- Map 36 (SIMPLE_EDGE), Reducer 51 (SIMPLE_EDGE) +Reducer 39 <- Map 36 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Map 47 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 41 <- Map 40 (SIMPLE_EDGE), Map 43 (SIMPLE_EDGE) +Reducer 42 <- Reducer 41 (SIMPLE_EDGE) +Reducer 44 <- Map 43 (SIMPLE_EDGE), Map 54 (SIMPLE_EDGE) +Reducer 45 <- Reducer 44 (SIMPLE_EDGE) +Reducer 5 <- Map 47 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 51 <- Map 50 (SIMPLE_EDGE), Map 52 (SIMPLE_EDGE) Reducer 53 <- Map 52 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Map 41 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 42 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 43 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 43 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 6 <- Map 36 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 36 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 48 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 48 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 18 vectorized + Reducer 14 vectorized File Output Operator [FS_1079] Select Operator [SEL_1078] (rows=104628491644 width=1702) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_201] - Select Operator [SEL_200] (rows=104628491644 width=1694) + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_205] + Select Operator [SEL_204] (rows=104628491644 width=1694) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] - Filter Operator [FIL_199] (rows=104628491644 width=1694) + Filter Operator [FIL_203] (rows=104628491644 width=1694) predicate:(_col3 <= _col19) Merge Join Operator [MERGEJOIN_977] (rows=313885474933 width=1694) Conds:RS_1052._col1, _col0, _col2=RS_1077._col2, _col1, _col3(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22"] - <-Reducer 16 [SIMPLE_EDGE] vectorized + <-Reducer 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1052] PartitionCols:_col1, _col0, _col2 Select Operator [SEL_1051] (rows=21304422 width=525) @@ -337,234 +337,234 @@ Stage-0 Output:["_col0","_col1","_col2","_col13","_col14","_col15","_col16"] Group By Operator [GBY_1048] (rows=21304422 width=1255) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_93] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_95] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Group By Operator [GBY_92] (rows=21304422 width=1255) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"],aggregations:["count()","sum(_col8)","sum(_col9)","sum(_col10)"],keys:_col24, _col11, _col25, _col29, _col31, _col37, _col38, _col39, _col40, _col42, _col43, _col44, _col45 + Group By Operator [GBY_94] (rows=21304422 width=1255) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"],aggregations:["count()","sum(_col16)","sum(_col17)","sum(_col18)"],keys:_col24, _col19, _col25, _col29, _col31, _col37, _col38, _col39, _col40, _col42, _col43, _col44, _col45 Merge Join Operator [MERGEJOIN_961] (rows=21304422 width=1048) - Conds:RS_88._col17=RS_1045._col0(Inner),Output:["_col8","_col9","_col10","_col11","_col24","_col25","_col29","_col31","_col37","_col38","_col39","_col40","_col42","_col43","_col44","_col45"] + Conds:RS_90._col5=RS_1044._col0(Inner),Output:["_col16","_col17","_col18","_col19","_col24","_col25","_col29","_col31","_col37","_col38","_col39","_col40","_col42","_col43","_col44","_col45"] <-Map 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1045] + SHUFFLE [RS_1044] PartitionCols:_col0 Select Operator [SEL_1043] (rows=40000000 width=365) Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_44] (rows=40000000 width=365) - default@customer_address,ad1,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_city","ca_zip"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_88] - PartitionCols:_col17 + TableScan [TS_60] (rows=40000000 width=365) + default@customer_address,ad2,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_city","ca_zip"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_90] + PartitionCols:_col5 Merge Join Operator [MERGEJOIN_960] (rows=21304422 width=691) - Conds:RS_85._col5=RS_1044._col0(Inner),Output:["_col8","_col9","_col10","_col11","_col17","_col24","_col25","_col29","_col31","_col37","_col38","_col39","_col40"] + Conds:RS_87._col13=RS_1045._col0(Inner),Output:["_col5","_col16","_col17","_col18","_col19","_col24","_col25","_col29","_col31","_col37","_col38","_col39","_col40"] <-Map 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1044] + SHUFFLE [RS_1045] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_1043] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_85] - PartitionCols:_col5 - Filter Operator [FIL_84] (rows=21304422 width=502) + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_87] + PartitionCols:_col13 + Filter Operator [FIL_86] (rows=21304422 width=502) predicate:(_col33 <> _col35) Merge Join Operator [MERGEJOIN_959] (rows=21304422 width=502) - Conds:RS_81._col15=RS_1040._col0(Inner),Output:["_col5","_col8","_col9","_col10","_col11","_col17","_col24","_col25","_col29","_col31","_col33","_col35"] + Conds:RS_83._col3=RS_1040._col0(Inner),Output:["_col5","_col13","_col16","_col17","_col18","_col19","_col24","_col25","_col29","_col31","_col33","_col35"] <-Map 48 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1040] PartitionCols:_col0 Select Operator [SEL_1038] (rows=1861800 width=89) Output:["_col0","_col1"] - TableScan [TS_40] (rows=1861800 width=89) + TableScan [TS_54] (rows=1861800 width=89) default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_81] - PartitionCols:_col15 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_83] + PartitionCols:_col3 Merge Join Operator [MERGEJOIN_958] (rows=21007353 width=418) - Conds:RS_78._col3=RS_1039._col0(Inner),Output:["_col5","_col8","_col9","_col10","_col11","_col15","_col17","_col24","_col25","_col29","_col31","_col33"] + Conds:RS_80._col11=RS_1039._col0(Inner),Output:["_col3","_col5","_col13","_col16","_col17","_col18","_col19","_col24","_col25","_col29","_col31","_col33"] <-Map 48 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1039] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_1038] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_78] - PartitionCols:_col3 + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_80] + PartitionCols:_col11 Merge Join Operator [MERGEJOIN_957] (rows=20714426 width=331) - Conds:RS_75._col18=RS_984._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col15","_col17","_col24","_col25","_col29","_col31"] - <-Map 44 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_984] + Conds:RS_77._col6=RS_993._col0(Inner),Output:["_col3","_col5","_col11","_col13","_col16","_col17","_col18","_col19","_col24","_col25","_col29","_col31"] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_993] PartitionCols:_col0 - Select Operator [SEL_978] (rows=73049 width=8) + Select Operator [SEL_987] (rows=73049 width=8) Output:["_col0","_col1"] - TableScan [TS_38] (rows=73049 width=8) - default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_75] - PartitionCols:_col18 + TableScan [TS_11] (rows=73049 width=8) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col6 Merge Join Operator [MERGEJOIN_956] (rows=20714426 width=331) - Conds:RS_72._col19=RS_986._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col15","_col17","_col18","_col24","_col25","_col29"] - <-Map 44 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_986] + Conds:RS_74._col7=RS_992._col0(Inner),Output:["_col3","_col5","_col6","_col11","_col13","_col16","_col17","_col18","_col19","_col24","_col25","_col29"] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_992] PartitionCols:_col0 - Select Operator [SEL_980] (rows=73049 width=8) + Select Operator [SEL_986] (rows=73049 width=8) Output:["_col0","_col1"] - Please refer to the previous TableScan [TS_38] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_72] - PartitionCols:_col19 + Please refer to the previous TableScan [TS_11] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_74] + PartitionCols:_col7 Merge Join Operator [MERGEJOIN_955] (rows=20714426 width=330) - Conds:RS_69._col16=RS_1035._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col15","_col17","_col18","_col19","_col24","_col25"] - <-Map 43 [SIMPLE_EDGE] vectorized + Conds:RS_71._col4=RS_1035._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col11","_col13","_col16","_col17","_col18","_col19","_col24","_col25"] + <-Map 47 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1035] PartitionCols:_col0 Select Operator [SEL_1033] (rows=7200 width=4) Output:["_col0"] Filter Operator [FIL_1032] (rows=7200 width=8) predicate:hd_income_band_sk is not null - TableScan [TS_30] (rows=7200 width=8) + TableScan [TS_44] (rows=7200 width=8) default@household_demographics,hd1,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_income_band_sk"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_69] - PartitionCols:_col16 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_71] + PartitionCols:_col4 Merge Join Operator [MERGEJOIN_954] (rows=20714426 width=334) - Conds:RS_66._col4=RS_1034._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col15","_col16","_col17","_col18","_col19","_col24","_col25"] - <-Map 43 [SIMPLE_EDGE] vectorized + Conds:RS_68._col12=RS_1034._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col11","_col13","_col16","_col17","_col18","_col19","_col24","_col25"] + <-Map 47 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1034] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_1033] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_66] - PartitionCols:_col4 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_68] + PartitionCols:_col12 Merge Join Operator [MERGEJOIN_953] (rows=20714426 width=336) - Conds:RS_63._col6=RS_1030._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col15","_col16","_col17","_col18","_col19","_col24","_col25"] - <-Map 42 [SIMPLE_EDGE] vectorized + Conds:RS_65._col14=RS_1030._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col11","_col12","_col13","_col16","_col17","_col18","_col19","_col24","_col25"] + <-Map 46 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1030] PartitionCols:_col0 Select Operator [SEL_1029] (rows=1704 width=181) Output:["_col0","_col1","_col2"] Filter Operator [FIL_1028] (rows=1704 width=181) predicate:(s_store_name is not null and s_zip is not null) - TableScan [TS_27] (rows=1704 width=181) + TableScan [TS_41] (rows=1704 width=181) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_63] - PartitionCols:_col6 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col14 Merge Join Operator [MERGEJOIN_952] (rows=20714426 width=160) - Conds:RS_60._col1, _col7=RS_1026._col0, _col1(Inner),Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col15","_col16","_col17","_col18","_col19"] - <-Map 41 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1026] + Conds:RS_979._col0, _col1=RS_63._col7, _col13(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col11","_col12","_col13","_col14","_col16","_col17","_col18","_col19"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_979] PartitionCols:_col0, _col1 - Select Operator [SEL_1025] (rows=57591150 width=8) + Select Operator [SEL_978] (rows=57591150 width=8) Output:["_col0","_col1"] - TableScan [TS_25] (rows=57591150 width=8) + TableScan [TS_0] (rows=57591150 width=8) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_60] - PartitionCols:_col1, _col7 + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_63] + PartitionCols:_col7, _col13 Merge Join Operator [MERGEJOIN_951] (rows=12564038 width=28) - Conds:RS_57._col1=RS_1024._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col15","_col16","_col17","_col18","_col19"] - <-Reducer 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1024] - PartitionCols:_col0 - Select Operator [SEL_1023] (rows=13257 width=4) - Output:["_col0"] - Filter Operator [FIL_1022] (rows=13257 width=228) - predicate:(_col1 > (2 * _col2)) - Group By Operator [GBY_1021] (rows=39773 width=228) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 36 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col0 - Group By Operator [GBY_20] (rows=6482999 width=228) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 - Merge Join Operator [MERGEJOIN_950] (rows=183085709 width=227) - Conds:RS_1017._col0, _col1=RS_1019._col0, _col1(Inner),Output:["_col0","_col2","_col5"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1019] - PartitionCols:_col0, _col1 - Select Operator [SEL_1018] (rows=28798881 width=120) - Output:["_col0","_col1","_col2"] - TableScan [TS_14] (rows=28798881 width=337) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] - <-Map 35 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1017] - PartitionCols:_col0, _col1 - Select Operator [SEL_1016] (rows=287989836 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1015] (rows=287989836 width=119) - predicate:(cs_item_sk BETWEEN DynamicValue(RS_49_item_i_item_sk_min) AND DynamicValue(RS_49_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_49_item_i_item_sk_bloom_filter))) - TableScan [TS_12] (rows=287989836 width=119) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1014] - Group By Operator [GBY_1013] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1008] - Group By Operator [GBY_1007] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1006] (rows=4667 width=4) - Output:["_col0"] - Select Operator [SEL_1004] (rows=4667 width=4) - Output:["_col0"] - Filter Operator [FIL_1003] (rows=4667 width=204) - predicate:(i_current_price BETWEEN 36 AND 45 and (i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate')) - TableScan [TS_3] (rows=462000 width=204) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_color"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_57] - PartitionCols:_col1 + Conds:RS_37._col7=RS_1027._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col7 Merge Join Operator [MERGEJOIN_949] (rows=12564038 width=28) - Conds:RS_54._col2=RS_1011._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col15","_col16","_col17","_col18","_col19"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1011] + Conds:RS_983._col0=RS_35._col2(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_983] PartitionCols:_col0 - Select Operator [SEL_1010] (rows=69376329 width=23) + Select Operator [SEL_982] (rows=69376329 width=23) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1009] (rows=69376329 width=23) + Filter Operator [FIL_981] (rows=69376329 width=23) predicate:(c_first_shipto_date_sk is not null and c_first_sales_date_sk is not null and c_current_hdemo_sk is not null and c_current_cdemo_sk is not null and c_current_addr_sk is not null) - TableScan [TS_9] (rows=80000000 width=23) + TableScan [TS_2] (rows=80000000 width=23) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_shipto_date_sk","c_first_sales_date_sk"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_54] + <-Reducer 33 [SIMPLE_EDGE] + SHUFFLE [RS_35] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_948] (rows=14487982 width=12) - Conds:RS_51._col0=RS_990._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 44 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_990] + Conds:RS_17._col0=RS_997._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_997] PartitionCols:_col0 - Select Operator [SEL_985] (rows=652 width=4) + Select Operator [SEL_991] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_979] (rows=652 width=8) + Filter Operator [FIL_985] (rows=652 width=8) predicate:(d_year = 2001) - Please refer to the previous TableScan [TS_38] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_51] + Please refer to the previous TableScan [TS_11] + <-Reducer 32 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_947] (rows=40575792 width=205) - Conds:RS_1002._col1=RS_1005._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 19 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1005] + Conds:RS_1009._col1=RS_1012._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 34 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1012] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1004] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1002] + Select Operator [SEL_1011] (rows=4667 width=4) + Output:["_col0"] + Filter Operator [FIL_1010] (rows=4667 width=204) + predicate:(i_current_price BETWEEN 36 AND 45 and (i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate')) + TableScan [TS_8] (rows=462000 width=204) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_color"] + <-Map 31 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1009] PartitionCols:_col1 - Select Operator [SEL_1001] (rows=417313408 width=351) + Select Operator [SEL_1008] (rows=417313408 width=351) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_1000] (rows=417313408 width=355) - predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_promo_sk is not null and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_52_d1_d_date_sk_min) AND DynamicValue(RS_52_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_52_d1_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=355) + Filter Operator [FIL_1007] (rows=417313408 width=355) + predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_promo_sk is not null and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_18_d1_d_date_sk_min) AND DynamicValue(RS_18_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_18_d1_d_date_sk_bloom_filter))) + TableScan [TS_5] (rows=575995635 width=355) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 45 [BROADCAST_EDGE] vectorized - BROADCAST [RS_999] - Group By Operator [GBY_998] (rows=1 width=12) + <-Reducer 37 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1006] + Group By Operator [GBY_1005] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 44 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_996] - Group By Operator [GBY_994] (rows=1 width=12) + <-Map 36 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1003] + Group By Operator [GBY_1001] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_991] (rows=652 width=4) + Select Operator [SEL_998] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_985] - <-Reducer 34 [SIMPLE_EDGE] vectorized + Please refer to the previous Select Operator [SEL_991] + <-Reducer 42 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1027] + PartitionCols:_col0 + Select Operator [SEL_1026] (rows=13257 width=4) + Output:["_col0"] + Filter Operator [FIL_1025] (rows=13257 width=228) + predicate:(_col1 > (2 * _col2)) + Group By Operator [GBY_1024] (rows=39773 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 41 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0 + Group By Operator [GBY_29] (rows=6482999 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 + Merge Join Operator [MERGEJOIN_950] (rows=183085709 width=227) + Conds:RS_1020._col0, _col1=RS_1022._col0, _col1(Inner),Output:["_col0","_col2","_col5"] + <-Map 43 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1022] + PartitionCols:_col0, _col1 + Select Operator [SEL_1021] (rows=28798881 width=120) + Output:["_col0","_col1","_col2"] + TableScan [TS_23] (rows=28798881 width=337) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1020] + PartitionCols:_col0, _col1 + Select Operator [SEL_1019] (rows=287989836 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1018] (rows=287989836 width=119) + predicate:(cs_item_sk BETWEEN DynamicValue(RS_15_item_i_item_sk_min) AND DynamicValue(RS_15_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_15_item_i_item_sk_bloom_filter))) + TableScan [TS_21] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] + <-Reducer 35 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1017] + Group By Operator [GBY_1016] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1015] + Group By Operator [GBY_1014] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1013] (rows=4667 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1011] + <-Reducer 25 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1077] PartitionCols:_col2, _col1, _col3 Select Operator [SEL_1076] (rows=21304422 width=1354) @@ -575,133 +575,133 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col14","_col15","_col16","_col17"] Group By Operator [GBY_1073] (rows=21304422 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 - <-Reducer 33 [SIMPLE_EDGE] - SHUFFLE [RS_191] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_195] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Group By Operator [GBY_190] (rows=21304422 width=1362) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col8)","sum(_col9)","sum(_col10)"],keys:_col24, _col11, _col25, _col12, _col29, _col31, _col37, _col38, _col39, _col40, _col42, _col43, _col44, _col45 + Group By Operator [GBY_194] (rows=21304422 width=1362) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col16)","sum(_col17)","sum(_col18)"],keys:_col24, _col19, _col25, _col20, _col29, _col31, _col37, _col38, _col39, _col40, _col42, _col43, _col44, _col45 Merge Join Operator [MERGEJOIN_976] (rows=21304422 width=1155) - Conds:RS_186._col17=RS_1047._col0(Inner),Output:["_col8","_col9","_col10","_col11","_col12","_col24","_col25","_col29","_col31","_col37","_col38","_col39","_col40","_col42","_col43","_col44","_col45"] + Conds:RS_190._col5=RS_1047._col0(Inner),Output:["_col16","_col17","_col18","_col19","_col20","_col24","_col25","_col29","_col31","_col37","_col38","_col39","_col40","_col42","_col43","_col44","_col45"] <-Map 49 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1047] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_1043] - <-Reducer 32 [SIMPLE_EDGE] - SHUFFLE [RS_186] - PartitionCols:_col17 + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_190] + PartitionCols:_col5 Merge Join Operator [MERGEJOIN_975] (rows=21304422 width=798) - Conds:RS_183._col5=RS_1046._col0(Inner),Output:["_col8","_col9","_col10","_col11","_col12","_col17","_col24","_col25","_col29","_col31","_col37","_col38","_col39","_col40"] + Conds:RS_187._col13=RS_1046._col0(Inner),Output:["_col5","_col16","_col17","_col18","_col19","_col20","_col24","_col25","_col29","_col31","_col37","_col38","_col39","_col40"] <-Map 49 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1046] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_1043] - <-Reducer 31 [SIMPLE_EDGE] - SHUFFLE [RS_183] - PartitionCols:_col5 - Filter Operator [FIL_182] (rows=21304422 width=609) + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_187] + PartitionCols:_col13 + Filter Operator [FIL_186] (rows=21304422 width=609) predicate:(_col33 <> _col35) Merge Join Operator [MERGEJOIN_974] (rows=21304422 width=609) - Conds:RS_179._col15=RS_1042._col0(Inner),Output:["_col5","_col8","_col9","_col10","_col11","_col12","_col17","_col24","_col25","_col29","_col31","_col33","_col35"] + Conds:RS_183._col3=RS_1042._col0(Inner),Output:["_col5","_col13","_col16","_col17","_col18","_col19","_col20","_col24","_col25","_col29","_col31","_col33","_col35"] <-Map 48 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1042] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_1038] - <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_179] - PartitionCols:_col15 + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_183] + PartitionCols:_col3 Merge Join Operator [MERGEJOIN_973] (rows=21007353 width=525) - Conds:RS_176._col3=RS_1041._col0(Inner),Output:["_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col24","_col25","_col29","_col31","_col33"] + Conds:RS_180._col11=RS_1041._col0(Inner),Output:["_col3","_col5","_col13","_col16","_col17","_col18","_col19","_col20","_col24","_col25","_col29","_col31","_col33"] <-Map 48 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1041] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_1038] - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_176] - PartitionCols:_col3 + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_180] + PartitionCols:_col11 Merge Join Operator [MERGEJOIN_972] (rows=20714426 width=438) - Conds:RS_173._col18=RS_988._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col24","_col25","_col29","_col31"] - <-Map 44 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_988] + Conds:RS_177._col6=RS_996._col0(Inner),Output:["_col3","_col5","_col11","_col13","_col16","_col17","_col18","_col19","_col20","_col24","_col25","_col29","_col31"] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_996] PartitionCols:_col0 - Select Operator [SEL_982] (rows=73049 width=8) + Select Operator [SEL_990] (rows=73049 width=8) Output:["_col0","_col1"] - Please refer to the previous TableScan [TS_38] - <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_173] - PartitionCols:_col18 + Please refer to the previous TableScan [TS_11] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_177] + PartitionCols:_col6 Merge Join Operator [MERGEJOIN_971] (rows=20714426 width=438) - Conds:RS_170._col19=RS_987._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col18","_col24","_col25","_col29"] - <-Map 44 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_987] + Conds:RS_174._col7=RS_995._col0(Inner),Output:["_col3","_col5","_col6","_col11","_col13","_col16","_col17","_col18","_col19","_col20","_col24","_col25","_col29"] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_995] PartitionCols:_col0 - Select Operator [SEL_981] (rows=73049 width=8) + Select Operator [SEL_989] (rows=73049 width=8) Output:["_col0","_col1"] - Please refer to the previous TableScan [TS_38] - <-Reducer 27 [SIMPLE_EDGE] - SHUFFLE [RS_170] - PartitionCols:_col19 + Please refer to the previous TableScan [TS_11] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_174] + PartitionCols:_col7 Merge Join Operator [MERGEJOIN_970] (rows=20714426 width=437) - Conds:RS_167._col16=RS_1037._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col18","_col19","_col24","_col25"] - <-Map 43 [SIMPLE_EDGE] vectorized + Conds:RS_171._col4=RS_1037._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col11","_col13","_col16","_col17","_col18","_col19","_col20","_col24","_col25"] + <-Map 47 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1037] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_1033] - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_167] - PartitionCols:_col16 + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_171] + PartitionCols:_col4 Merge Join Operator [MERGEJOIN_969] (rows=20714426 width=441) - Conds:RS_164._col4=RS_1036._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col24","_col25"] - <-Map 43 [SIMPLE_EDGE] vectorized + Conds:RS_168._col12=RS_1036._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col11","_col13","_col16","_col17","_col18","_col19","_col20","_col24","_col25"] + <-Map 47 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1036] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_1033] - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_164] - PartitionCols:_col4 + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_168] + PartitionCols:_col12 Merge Join Operator [MERGEJOIN_968] (rows=20714426 width=443) - Conds:RS_161._col6=RS_1031._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col24","_col25"] - <-Map 42 [SIMPLE_EDGE] vectorized + Conds:RS_165._col14=RS_1031._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col11","_col12","_col13","_col16","_col17","_col18","_col19","_col20","_col24","_col25"] + <-Map 46 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1031] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_1029] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_161] - PartitionCols:_col6 + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_165] + PartitionCols:_col14 Merge Join Operator [MERGEJOIN_967] (rows=20714426 width=267) - Conds:RS_158._col1, _col7=RS_1027._col0, _col1(Inner),Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19"] - <-Map 41 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1027] + Conds:RS_980._col0, _col1=RS_163._col7, _col13(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col11","_col12","_col13","_col14","_col16","_col17","_col18","_col19","_col20"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_980] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_1025] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_158] - PartitionCols:_col1, _col7 + Please refer to the previous Select Operator [SEL_978] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_163] + PartitionCols:_col7, _col13 Merge Join Operator [MERGEJOIN_966] (rows=12564038 width=135) - Conds:RS_155._col1=RS_1072._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_155] - PartitionCols:_col1 + Conds:RS_137._col7=RS_1072._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_137] + PartitionCols:_col7 Merge Join Operator [MERGEJOIN_964] (rows=12564038 width=135) - Conds:RS_152._col2=RS_1012._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1012] + Conds:RS_984._col0=RS_135._col2(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_984] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1010] - <-Reducer 46 [SIMPLE_EDGE] - SHUFFLE [RS_152] + Please refer to the previous Select Operator [SEL_982] + <-Reducer 38 [SIMPLE_EDGE] + SHUFFLE [RS_135] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_963] (rows=14487982 width=119) - Conds:RS_149._col0=RS_992._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - <-Map 44 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_992] + Conds:RS_117._col0=RS_999._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_999] PartitionCols:_col0 - Select Operator [SEL_989] (rows=652 width=4) + Select Operator [SEL_994] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_983] (rows=652 width=8) + Filter Operator [FIL_988] (rows=652 width=8) predicate:(d_year = 2000) - Please refer to the previous TableScan [TS_38] + Please refer to the previous TableScan [TS_11] <-Reducer 51 [SIMPLE_EDGE] - SHUFFLE [RS_149] + SHUFFLE [RS_117] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_962] (rows=40575792 width=312) Conds:RS_1057._col1=RS_1060._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] @@ -712,7 +712,7 @@ Stage-0 Output:["_col0","_col1"] Filter Operator [FIL_1058] (rows=4667 width=311) predicate:(i_current_price BETWEEN 36 AND 45 and (i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate')) - TableScan [TS_101] (rows=462000 width=311) + TableScan [TS_108] (rows=462000 width=311) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] <-Map 50 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1057] @@ -720,21 +720,21 @@ Stage-0 Select Operator [SEL_1056] (rows=417313408 width=351) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] Filter Operator [FIL_1055] (rows=417313408 width=355) - predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_promo_sk is not null and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_150_d1_d_date_sk_min) AND DynamicValue(RS_150_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_150_d1_d_date_sk_bloom_filter))) - TableScan [TS_98] (rows=575995635 width=355) + predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_promo_sk is not null and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_118_d1_d_date_sk_min) AND DynamicValue(RS_118_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_118_d1_d_date_sk_bloom_filter))) + TableScan [TS_105] (rows=575995635 width=355) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 47 [BROADCAST_EDGE] vectorized + <-Reducer 39 [BROADCAST_EDGE] vectorized BROADCAST [RS_1054] Group By Operator [GBY_1053] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 44 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_997] - Group By Operator [GBY_995] (rows=1 width=12) + <-Map 36 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1004] + Group By Operator [GBY_1002] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_993] (rows=652 width=4) + Select Operator [SEL_1000] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_989] - <-Reducer 40 [SIMPLE_EDGE] vectorized + Please refer to the previous Select Operator [SEL_994] + <-Reducer 45 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1072] PartitionCols:_col0 Select Operator [SEL_1071] (rows=13257 width=4) @@ -743,25 +743,25 @@ Stage-0 predicate:(_col1 > (2 * _col2)) Group By Operator [GBY_1069] (rows=39773 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 39 [SIMPLE_EDGE] - SHUFFLE [RS_119] + <-Reducer 44 [SIMPLE_EDGE] + SHUFFLE [RS_130] PartitionCols:_col0 - Group By Operator [GBY_118] (rows=6482999 width=228) + Group By Operator [GBY_129] (rows=6482999 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 Merge Join Operator [MERGEJOIN_965] (rows=183085709 width=227) - Conds:RS_1068._col0, _col1=RS_1020._col0, _col1(Inner),Output:["_col0","_col2","_col5"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1020] + Conds:RS_1068._col0, _col1=RS_1023._col0, _col1(Inner),Output:["_col0","_col2","_col5"] + <-Map 43 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1023] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_1018] + Please refer to the previous Select Operator [SEL_1021] <-Map 54 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1068] PartitionCols:_col0, _col1 Select Operator [SEL_1067] (rows=287989836 width=119) Output:["_col0","_col1","_col2"] Filter Operator [FIL_1066] (rows=287989836 width=119) - predicate:(cs_item_sk BETWEEN DynamicValue(RS_147_item_i_item_sk_min) AND DynamicValue(RS_147_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_147_item_i_item_sk_bloom_filter))) - TableScan [TS_110] (rows=287989836 width=119) + predicate:(cs_item_sk BETWEEN DynamicValue(RS_115_item_i_item_sk_min) AND DynamicValue(RS_115_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_115_item_i_item_sk_bloom_filter))) + TableScan [TS_121] (rows=287989836 width=119) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] <-Reducer 53 [BROADCAST_EDGE] vectorized BROADCAST [RS_1065] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query65.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query65.q.out index 0bc7aaeb86..ae5f44f5b7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query65.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query65.q.out @@ -67,118 +67,120 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE) -Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) -Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 12 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Map 5 <- Reducer 11 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 10 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 9 <- Reducer 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_165] - Limit [LIM_164] (rows=100 width=702) + Reducer 4 vectorized + File Output Operator [FS_166] + Limit [LIM_165] (rows=100 width=702) Number of rows:100 - Select Operator [SEL_163] (rows=65392 width=700) + Select Operator [SEL_164] (rows=65392 width=700) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_51] - Select Operator [SEL_50] (rows=65392 width=700) + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_52] + Select Operator [SEL_51] (rows=65392 width=700) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Top N Key Operator [TNK_80] (rows=65392 width=704) - keys:_col6, _col8,top n:100 - Merge Join Operator [MERGEJOIN_138] (rows=65392 width=704) - Conds:RS_47._col1=RS_162._col0(Inner),Output:["_col2","_col6","_col8","_col9","_col10","_col11"] + Top N Key Operator [TNK_81] (rows=65392 width=704) + keys:_col1, _col8,top n:100 + Merge Join Operator [MERGEJOIN_139] (rows=65392 width=704) + Conds:RS_48._col3=RS_163._col0(Inner),Output:["_col1","_col4","_col8","_col9","_col10","_col11"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_162] + SHUFFLE [RS_163] PartitionCols:_col0 - Select Operator [SEL_161] (rows=462000 width=511) + Select Operator [SEL_162] (rows=462000 width=511) Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_38] (rows=462000 width=511) + TableScan [TS_43] (rows=462000 width=511) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_desc","i_current_price","i_wholesale_cost","i_brand"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_47] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_137] (rows=65392 width=204) - Conds:RS_44._col0=RS_160._col0(Inner),Output:["_col1","_col2","_col6"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_160] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_48] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_138] (rows=65392 width=204) + Conds:RS_141._col0=RS_46._col0(Inner),Output:["_col1","_col3","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_141] PartitionCols:_col0 - Select Operator [SEL_159] (rows=1704 width=92) + Select Operator [SEL_140] (rows=1704 width=92) Output:["_col0","_col1"] - TableScan [TS_36] (rows=1704 width=92) + TableScan [TS_0] (rows=1704 width=92) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_44] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_46] PartitionCols:_col0 - Filter Operator [FIL_43] (rows=65392 width=231) - predicate:(_col2 <= _col4) - Merge Join Operator [MERGEJOIN_136] (rows=196176 width=231) - Conds:RS_152._col0=RS_158._col0(Inner),Output:["_col0","_col1","_col2","_col4"] - <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_152] - PartitionCols:_col0 - Filter Operator [FIL_151] (rows=184637 width=118) - predicate:_col2 is not null - Group By Operator [GBY_150] (rows=184637 width=118) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_11] - PartitionCols:_col0, _col1 - Group By Operator [GBY_10] (rows=6093021 width=118) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Merge Join Operator [MERGEJOIN_134] (rows=91197860 width=89) - Conds:RS_149._col0=RS_141._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] - PartitionCols:_col0 - Select Operator [SEL_140] (rows=317 width=4) - Output:["_col0"] - Filter Operator [FIL_139] (rows=317 width=8) - predicate:d_month_seq BETWEEN 1212 AND 1223 - TableScan [TS_3] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] - PartitionCols:_col0 - Select Operator [SEL_148] (rows=525329897 width=118) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_147] (rows=525329897 width=118) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=118) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_146] - Group By Operator [GBY_145] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] - Group By Operator [GBY_143] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_142] (rows=317 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_140] - <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_158] - PartitionCols:_col0 - Select Operator [SEL_157] (rows=17 width=115) - Output:["_col0","_col1"] - Filter Operator [FIL_156] (rows=17 width=123) - predicate:CAST( (_col1 / _col2) AS decimal(21,6)) is not null - Group By Operator [GBY_155] (rows=17 width=123) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 - Select Operator [SEL_154] (rows=184637 width=118) - Output:["_col1","_col2"] - Group By Operator [GBY_153] (rows=184637 width=118) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col0 - Please refer to the previous Group By Operator [GBY_10] + Select Operator [SEL_42] (rows=65392 width=231) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_41] (rows=65392 width=231) + predicate:(_col2 <= _col4) + Merge Join Operator [MERGEJOIN_137] (rows=196176 width=231) + Conds:RS_155._col0=RS_161._col0(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_155] + PartitionCols:_col0 + Filter Operator [FIL_154] (rows=184637 width=118) + predicate:_col2 is not null + Group By Operator [GBY_153] (rows=184637 width=118) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0, _col1 + Group By Operator [GBY_12] (rows=6093021 width=118) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Merge Join Operator [MERGEJOIN_135] (rows=91197860 width=89) + Conds:RS_152._col0=RS_144._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_144] + PartitionCols:_col0 + Select Operator [SEL_143] (rows=317 width=4) + Output:["_col0"] + Filter Operator [FIL_142] (rows=317 width=8) + predicate:d_month_seq BETWEEN 1212 AND 1223 + TableScan [TS_5] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_152] + PartitionCols:_col0 + Select Operator [SEL_151] (rows=525329897 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_150] (rows=525329897 width=118) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_2] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_149] + Group By Operator [GBY_148] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_147] + Group By Operator [GBY_146] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_145] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_143] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_161] + PartitionCols:_col0 + Select Operator [SEL_160] (rows=17 width=115) + Output:["_col0","_col1"] + Filter Operator [FIL_159] (rows=17 width=123) + predicate:CAST( (_col1 / _col2) AS decimal(21,6)) is not null + Group By Operator [GBY_158] (rows=17 width=123) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 + Select Operator [SEL_157] (rows=184637 width=118) + Output:["_col1","_col2"] + Group By Operator [GBY_156] (rows=184637 width=118) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0 + Please refer to the previous Group By Operator [GBY_12] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out index 2917b8cd9c..8c2c2da384 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out @@ -457,198 +457,198 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 18 (BROADCAST_EDGE) -Map 21 <- Reducer 19 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 12 <- Map 16 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 17 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Map 20 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 17 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 8 <- Union 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Map 21 <- Reducer 20 (BROADCAST_EDGE) +Map 9 <- Reducer 19 (BROADCAST_EDGE) +Reducer 10 <- Map 13 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 11 <- Map 17 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 18 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 15 <- Map 17 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Map 18 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 20 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Union 4 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 9 vectorized - File Output Operator [FS_256] - Select Operator [SEL_255] (rows=100 width=4614) + Reducer 6 vectorized + File Output Operator [FS_257] + Select Operator [SEL_256] (rows=100 width=4614) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43"] - Limit [LIM_254] (rows=100 width=4510) + Limit [LIM_255] (rows=100 width=4510) Number of rows:100 - Select Operator [SEL_253] (rows=2423925 width=4510) + Select Operator [SEL_254] (rows=2423925 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] - <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_252] - Group By Operator [GBY_251] (rows=2423925 width=4510) + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_253] + Group By Operator [GBY_252] (rows=2423925 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)","sum(VALUE._col24)","sum(VALUE._col25)","sum(VALUE._col26)","sum(VALUE._col27)","sum(VALUE._col28)","sum(VALUE._col29)","sum(VALUE._col30)","sum(VALUE._col31)","sum(VALUE._col32)","sum(VALUE._col33)","sum(VALUE._col34)","sum(VALUE._col35)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Union 7 [SIMPLE_EDGE] - <-Reducer 15 [CONTAINS] vectorized - Reduce Output Operator [RS_266] + <-Union 4 [SIMPLE_EDGE] + <-Reducer 3 [CONTAINS] vectorized + Reduce Output Operator [RS_251] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_265] (rows=2513727 width=4510) + Group By Operator [GBY_250] (rows=2513727 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)","sum(_col32)","sum(_col33)","sum(_col34)","sum(_col35)","sum(_col36)","sum(_col37)","sum(_col38)","sum(_col39)","sum(_col40)","sum(_col41)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_264] (rows=2513727 width=3166) + Select Operator [SEL_249] (rows=2513727 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] - Top N Key Operator [TNK_263] (rows=2513727 width=3166) + Top N Key Operator [TNK_248] (rows=2513727 width=3166) keys:_col0, _col1, _col2, _col3, _col4, _col5,top n:100 - Group By Operator [GBY_262] (rows=2513700 width=3166) + Group By Operator [GBY_247] (rows=27 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_61] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_60] (rows=12905590 width=3166) + Group By Operator [GBY_29] (rows=27 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_58] (rows=12905590 width=750) + Select Operator [SEL_27] (rows=6624114 width=750) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] - Merge Join Operator [MERGEJOIN_207] (rows=12905590 width=750) - Conds:RS_55._col3=RS_245._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_245] + Merge Join Operator [MERGEJOIN_204] (rows=6624114 width=750) + Conds:RS_222._col0=RS_25._col3(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_222] PartitionCols:_col0 - Select Operator [SEL_243] (rows=27 width=482) + Select Operator [SEL_221] (rows=27 width=482) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - TableScan [TS_12] (rows=27 width=482) + TableScan [TS_0] (rows=27 width=482) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name","w_warehouse_sq_ft","w_city","w_county","w_state","w_country"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_55] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_25] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_206] (rows=12905590 width=275) - Conds:RS_52._col2=RS_224._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_224] + Merge Join Operator [MERGEJOIN_203] (rows=6624114 width=275) + Conds:RS_20._col2=RS_226._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_226] PartitionCols:_col0 - Select Operator [SEL_221] (rows=1 width=4) + Select Operator [SEL_225] (rows=1 width=4) Output:["_col0"] - Filter Operator [FIL_220] (rows=1 width=88) + Filter Operator [FIL_224] (rows=1 width=88) predicate:(sm_carrier) IN ('DIAMOND', 'AIRBORNE') - TableScan [TS_9] (rows=1 width=88) + TableScan [TS_11] (rows=1 width=88) default@ship_mode,ship_mode,Tbl:COMPLETE,Col:COMPLETE,Output:["sm_ship_mode_sk","sm_carrier"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_52] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_20] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_205] (rows=38716771 width=279) - Conds:RS_49._col0=RS_242._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_242] + Merge Join Operator [MERGEJOIN_202] (rows=19872342 width=279) + Conds:RS_17._col0=RS_245._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_245] PartitionCols:_col0 - Select Operator [SEL_240] (rows=652 width=52) + Select Operator [SEL_244] (rows=652 width=52) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - Filter Operator [FIL_239] (rows=652 width=12) + Filter Operator [FIL_243] (rows=652 width=12) predicate:(d_year = 2002) - TableScan [TS_6] (rows=73049 width=12) + TableScan [TS_8] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_49] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_204] (rows=109204159 width=235) - Conds:RS_261._col1=RS_238._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_238] + Merge Join Operator [MERGEJOIN_201] (rows=55655511 width=235) + Conds:RS_238._col1=RS_241._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_241] PartitionCols:_col0 - Select Operator [SEL_236] (rows=33426 width=4) + Select Operator [SEL_240] (rows=33426 width=4) Output:["_col0"] - Filter Operator [FIL_235] (rows=33426 width=8) + Filter Operator [FIL_239] (rows=33426 width=8) predicate:t_time BETWEEN 49530 AND 78330 - TableScan [TS_3] (rows=86400 width=8) + TableScan [TS_5] (rows=86400 width=8) default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_time"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_261] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_238] PartitionCols:_col1 - Select Operator [SEL_260] (rows=282272460 width=239) + Select Operator [SEL_237] (rows=143859154 width=239) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_259] (rows=282272460 width=243) - predicate:(cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_ship_mode_sk is not null and cs_ship_mode_sk BETWEEN DynamicValue(RS_53_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_53_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_53_ship_mode_sm_ship_mode_sk_bloom_filter))) - TableScan [TS_32] (rows=287989836 width=243) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_ship_mode_sk","cs_warehouse_sk","cs_quantity","cs_ext_sales_price","cs_net_paid_inc_ship_tax"] + Filter Operator [FIL_236] (rows=143859154 width=243) + predicate:(ws_sold_time_sk is not null and ws_warehouse_sk is not null and ws_sold_date_sk is not null and ws_ship_mode_sk is not null and ws_ship_mode_sk BETWEEN DynamicValue(RS_21_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_21_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(ws_ship_mode_sk, DynamicValue(RS_21_ship_mode_sm_ship_mode_sk_bloom_filter))) + TableScan [TS_2] (rows=144002668 width=243) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_ship_mode_sk","ws_warehouse_sk","ws_quantity","ws_sales_price","ws_net_paid_inc_tax"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_258] - Group By Operator [GBY_257] (rows=1 width=12) + BROADCAST [RS_235] + Group By Operator [GBY_234] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_229] - Group By Operator [GBY_227] (rows=1 width=12) + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_232] + Group By Operator [GBY_230] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_225] (rows=1 width=4) + Select Operator [SEL_227] (rows=1 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_221] - <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_250] + Please refer to the previous Select Operator [SEL_225] + <-Reducer 8 [CONTAINS] vectorized + Reduce Output Operator [RS_267] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_249] (rows=2513727 width=4510) + Group By Operator [GBY_266] (rows=2513727 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)","sum(_col32)","sum(_col33)","sum(_col34)","sum(_col35)","sum(_col36)","sum(_col37)","sum(_col38)","sum(_col39)","sum(_col40)","sum(_col41)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_248] (rows=2513727 width=3166) + Select Operator [SEL_265] (rows=2513727 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] - Top N Key Operator [TNK_247] (rows=2513727 width=3166) + Top N Key Operator [TNK_264] (rows=2513727 width=3166) keys:_col0, _col1, _col2, _col3, _col4, _col5,top n:100 - Group By Operator [GBY_246] (rows=27 width=3166) + Group By Operator [GBY_263] (rows=2513700 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_29] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_62] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_28] (rows=27 width=3166) + Group By Operator [GBY_61] (rows=12905590 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_26] (rows=6624114 width=750) + Select Operator [SEL_59] (rows=12905590 width=750) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] - Merge Join Operator [MERGEJOIN_203] (rows=6624114 width=750) - Conds:RS_23._col3=RS_244._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_244] + Merge Join Operator [MERGEJOIN_208] (rows=12905590 width=750) + Conds:RS_56._col3=RS_223._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_223] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_243] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_23] + Please refer to the previous Select Operator [SEL_221] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_56] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_202] (rows=6624114 width=275) - Conds:RS_20._col2=RS_222._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_222] + Merge Join Operator [MERGEJOIN_207] (rows=12905590 width=275) + Conds:RS_53._col2=RS_228._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_228] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_221] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_20] + Please refer to the previous Select Operator [SEL_225] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_53] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_201] (rows=19872342 width=279) - Conds:RS_17._col0=RS_241._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_241] + Merge Join Operator [MERGEJOIN_206] (rows=38716771 width=279) + Conds:RS_50._col0=RS_246._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_246] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_240] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_17] + Please refer to the previous Select Operator [SEL_244] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_50] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_200] (rows=55655511 width=235) - Conds:RS_234._col1=RS_237._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_237] + Merge Join Operator [MERGEJOIN_205] (rows=109204159 width=235) + Conds:RS_262._col1=RS_242._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_242] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_236] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] + Please refer to the previous Select Operator [SEL_240] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_262] PartitionCols:_col1 - Select Operator [SEL_233] (rows=143859154 width=239) + Select Operator [SEL_261] (rows=282272460 width=239) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_232] (rows=143859154 width=243) - predicate:(ws_sold_time_sk is not null and ws_warehouse_sk is not null and ws_sold_date_sk is not null and ws_ship_mode_sk is not null and ws_ship_mode_sk BETWEEN DynamicValue(RS_21_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_21_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(ws_ship_mode_sk, DynamicValue(RS_21_ship_mode_sm_ship_mode_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=243) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_ship_mode_sk","ws_warehouse_sk","ws_quantity","ws_sales_price","ws_net_paid_inc_tax"] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_231] - Group By Operator [GBY_230] (rows=1 width=12) + Filter Operator [FIL_260] (rows=282272460 width=243) + predicate:(cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_ship_mode_sk is not null and cs_ship_mode_sk BETWEEN DynamicValue(RS_54_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_54_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_54_ship_mode_sm_ship_mode_sk_bloom_filter))) + TableScan [TS_33] (rows=287989836 width=243) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_ship_mode_sk","cs_warehouse_sk","cs_quantity","cs_ext_sales_price","cs_net_paid_inc_ship_tax"] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_259] + Group By Operator [GBY_258] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_228] - Group By Operator [GBY_226] (rows=1 width=12) + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_233] + Group By Operator [GBY_231] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_223] (rows=1 width=4) + Select Operator [SEL_229] (rows=1 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_221] + Please refer to the previous Select Operator [SEL_225] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query68.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query68.q.out index 018d316cdb..845f606c96 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query68.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query68.q.out @@ -113,120 +113,120 @@ Stage-0 limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_173] - Limit [LIM_172] (rows=100 width=706) + File Output Operator [FS_174] + Limit [LIM_173] (rows=100 width=706) Number of rows:100 - Select Operator [SEL_171] (rows=727776 width=706) + Select Operator [SEL_172] (rows=727776 width=706) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_44] - Select Operator [SEL_43] (rows=727776 width=706) + SHUFFLE [RS_45] + Select Operator [SEL_44] (rows=727776 width=706) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Top N Key Operator [TNK_78] (rows=727776 width=706) + Top N Key Operator [TNK_79] (rows=727776 width=706) keys:_col3, _col6,top n:100 - Filter Operator [FIL_42] (rows=727776 width=706) + Filter Operator [FIL_43] (rows=727776 width=706) predicate:(_col5 <> _col8) - Merge Join Operator [MERGEJOIN_144] (rows=727776 width=706) - Conds:RS_39._col0=RS_170._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_145] (rows=727776 width=706) + Conds:RS_40._col0=RS_171._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_39] + SHUFFLE [RS_40] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_139] (rows=80000000 width=277) - Conds:RS_147._col1=RS_149._col0(Inner),Output:["_col0","_col2","_col3","_col5"] + Merge Join Operator [MERGEJOIN_140] (rows=80000000 width=277) + Conds:RS_148._col1=RS_150._col0(Inner),Output:["_col0","_col2","_col3","_col5"] <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] + SHUFFLE [RS_150] PartitionCols:_col0 - Select Operator [SEL_148] (rows=40000000 width=97) + Select Operator [SEL_149] (rows=40000000 width=97) Output:["_col0","_col1"] TableScan [TS_3] (rows=40000000 width=97) default@customer_address,current_addr,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_city"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_147] + SHUFFLE [RS_148] PartitionCols:_col1 - Select Operator [SEL_146] (rows=80000000 width=188) + Select Operator [SEL_147] (rows=80000000 width=188) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_145] (rows=80000000 width=188) + Filter Operator [FIL_146] (rows=80000000 width=188) predicate:c_current_addr_sk is not null TableScan [TS_0] (rows=80000000 width=188) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_170] + SHUFFLE [RS_171] PartitionCols:_col1 - Select Operator [SEL_169] (rows=727776 width=433) + Select Operator [SEL_170] (rows=727776 width=433) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_168] (rows=727776 width=433) + Group By Operator [GBY_169] (rows=727776 width=433) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_33] + SHUFFLE [RS_34] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_32] (rows=727776 width=433) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col1, _col13, _col3, _col5 - Merge Join Operator [MERGEJOIN_143] (rows=727776 width=97) - Conds:RS_28._col3=RS_150._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col13"] + Group By Operator [GBY_33] (rows=727776 width=433) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col8)","sum(_col9)","sum(_col10)"],keys:_col3, _col1, _col5, _col7 + Merge Join Operator [MERGEJOIN_144] (rows=727776 width=97) + Conds:RS_151._col0=RS_30._col3(Inner),Output:["_col1","_col3","_col5","_col7","_col8","_col9","_col10"] <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_150] + SHUFFLE [RS_151] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_148] + Please refer to the previous Select Operator [SEL_149] <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_28] + SHUFFLE [RS_30] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_142] (rows=727776 width=4) - Conds:RS_25._col2=RS_167._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_143] (rows=727776 width=4) + Conds:RS_25._col2=RS_168._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_167] + SHUFFLE [RS_168] PartitionCols:_col0 - Select Operator [SEL_166] (rows=1855 width=4) + Select Operator [SEL_167] (rows=1855 width=4) Output:["_col0"] - Filter Operator [FIL_165] (rows=1855 width=12) + Filter Operator [FIL_166] (rows=1855 width=12) predicate:((hd_vehicle_count = 1) or (hd_dep_count = 2)) - TableScan [TS_14] (rows=7200 width=12) + TableScan [TS_16] (rows=7200 width=12) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_141] (rows=2824787 width=4) - Conds:RS_22._col4=RS_164._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_142] (rows=2824787 width=4) + Conds:RS_22._col4=RS_165._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] + SHUFFLE [RS_165] PartitionCols:_col0 - Select Operator [SEL_163] (rows=14 width=4) + Select Operator [SEL_164] (rows=14 width=4) Output:["_col0"] - Filter Operator [FIL_162] (rows=14 width=97) + Filter Operator [FIL_163] (rows=14 width=97) predicate:(s_city) IN ('Cedar Grove', 'Wildwood') - TableScan [TS_11] (rows=1704 width=97) + TableScan [TS_13] (rows=1704 width=97) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_city"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_140] (rows=42598570 width=185) - Conds:RS_161._col0=RS_153._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_141] (rows=42598570 width=185) + Conds:RS_162._col0=RS_154._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] + SHUFFLE [RS_154] PartitionCols:_col0 - Select Operator [SEL_152] (rows=170 width=4) + Select Operator [SEL_153] (rows=170 width=4) Output:["_col0"] - Filter Operator [FIL_151] (rows=170 width=12) + Filter Operator [FIL_152] (rows=170 width=12) predicate:(d_dom BETWEEN 1 AND 2 and (d_year) IN (1998, 1999, 2000)) - TableScan [TS_8] (rows=73049 width=12) + TableScan [TS_10] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dom"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_161] + SHUFFLE [RS_162] PartitionCols:_col0 - Select Operator [SEL_160] (rows=457565061 width=343) + Select Operator [SEL_161] (rows=457565061 width=343) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_159] (rows=457565061 width=343) + Filter Operator [FIL_160] (rows=457565061 width=343) predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_5] (rows=575995635 width=343) + TableScan [TS_7] (rows=575995635 width=343) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_ticket_number","ss_ext_sales_price","ss_ext_list_price","ss_ext_tax"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_158] - Group By Operator [GBY_157] (rows=1 width=12) + BROADCAST [RS_159] + Group By Operator [GBY_158] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] - Group By Operator [GBY_155] (rows=1 width=12) + SHUFFLE [RS_157] + Group By Operator [GBY_156] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_154] (rows=170 width=4) + Select Operator [SEL_155] (rows=170 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_152] + Please refer to the previous Select Operator [SEL_153] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query7.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query7.q.out index 9dc4baa9fd..8a19ae0bd7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query7.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query7.q.out @@ -53,107 +53,107 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Map 5 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 7 <- Map 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_128] - Limit [LIM_127] (rows=100 width=444) + Reducer 4 vectorized + File Output Operator [FS_129] + Limit [LIM_128] (rows=100 width=444) Number of rows:100 - Select Operator [SEL_126] (rows=310774 width=444) + Select Operator [SEL_127] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_125] - Select Operator [SEL_124] (rows=310774 width=444) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_126] + Select Operator [SEL_125] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_123] (rows=310774 width=476) + Group By Operator [GBY_124] (rows=310774 width=476) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_28] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_29] PartitionCols:_col0 - Group By Operator [GBY_27] (rows=462000 width=476) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col12 - Top N Key Operator [TNK_57] (rows=1441769 width=100) - keys:_col12,top n:100 - Merge Join Operator [MERGEJOIN_103] (rows=1441769 width=100) - Conds:RS_23._col1=RS_122._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col12"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_122] + Group By Operator [GBY_28] (rows=462000 width=476) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col6)","count(_col6)","sum(_col7)","count(_col7)","sum(_col9)","count(_col9)","sum(_col8)","count(_col8)"],keys:_col1 + Top N Key Operator [TNK_58] (rows=1441769 width=100) + keys:_col1,top n:100 + Merge Join Operator [MERGEJOIN_104] (rows=1441769 width=100) + Conds:RS_106._col0=RS_25._col1(Inner),Output:["_col1","_col6","_col7","_col8","_col9"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_106] PartitionCols:_col0 - Select Operator [SEL_121] (rows=462000 width=104) + Select Operator [SEL_105] (rows=462000 width=104) Output:["_col0","_col1"] - TableScan [TS_12] (rows=462000 width=104) + TableScan [TS_0] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_23] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_25] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_102] (rows=1441769 width=4) - Conds:RS_20._col3=RS_120._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] + Merge Join Operator [MERGEJOIN_103] (rows=1441769 width=4) + Conds:RS_20._col3=RS_123._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_123] PartitionCols:_col0 - Select Operator [SEL_119] (rows=2300 width=4) + Select Operator [SEL_122] (rows=2300 width=4) Output:["_col0"] - Filter Operator [FIL_118] (rows=2300 width=174) + Filter Operator [FIL_121] (rows=2300 width=174) predicate:((p_channel_email = 'N') or (p_channel_event = 'N')) - TableScan [TS_9] (rows=2300 width=174) + TableScan [TS_11] (rows=2300 width=174) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_email","p_channel_event"] - <-Reducer 3 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_20] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_101] (rows=1441769 width=4) - Conds:RS_17._col0=RS_117._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] + Merge Join Operator [MERGEJOIN_102] (rows=1441769 width=4) + Conds:RS_17._col0=RS_120._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_120] PartitionCols:_col0 - Select Operator [SEL_116] (rows=652 width=4) + Select Operator [SEL_119] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_115] (rows=652 width=8) + Filter Operator [FIL_118] (rows=652 width=8) predicate:(d_year = 1998) - TableScan [TS_6] (rows=73049 width=8) + TableScan [TS_8] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_100] (rows=4037893 width=4) - Conds:RS_114._col2=RS_106._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_106] + Merge Join Operator [MERGEJOIN_101] (rows=4037893 width=4) + Conds:RS_117._col2=RS_109._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_109] PartitionCols:_col0 - Select Operator [SEL_105] (rows=14776 width=4) + Select Operator [SEL_108] (rows=14776 width=4) Output:["_col0"] - Filter Operator [FIL_104] (rows=14776 width=268) + Filter Operator [FIL_107] (rows=14776 width=268) predicate:((cd_marital_status = 'W') and (cd_education_status = 'Primary') and (cd_gender = 'F')) - TableScan [TS_3] (rows=1861800 width=268) + TableScan [TS_5] (rows=1861800 width=268) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_114] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_117] PartitionCols:_col2 - Select Operator [SEL_113] (rows=501686735 width=340) + Select Operator [SEL_116] (rows=501686735 width=340) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_112] (rows=501686735 width=340) + Filter Operator [FIL_115] (rows=501686735 width=340) predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_promo_sk is not null and ss_cdemo_sk BETWEEN DynamicValue(RS_15_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_15_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_15_customer_demographics_cd_demo_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=340) + TableScan [TS_2] (rows=575995635 width=340) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_promo_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_111] - Group By Operator [GBY_110] (rows=1 width=12) + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_114] + Group By Operator [GBY_113] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_109] - Group By Operator [GBY_108] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_112] + Group By Operator [GBY_111] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_107] (rows=14776 width=4) + Select Operator [SEL_110] (rows=14776 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_105] + Please refer to the previous Select Operator [SEL_108] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query71.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query71.q.out index 83065fd2a2..77c8f7f194 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query71.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query71.q.out @@ -91,171 +91,171 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Map 10 <- Reducer 13 (BROADCAST_EDGE) -Map 14 <- Reducer 17 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE), Union 3 (CONTAINS) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE), Union 3 (CONTAINS) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE), Union 3 (CONTAINS) -Reducer 4 <- Map 18 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) -Reducer 5 <- Map 19 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Map 11 <- Reducer 14 (BROADCAST_EDGE) +Map 15 <- Reducer 18 (BROADCAST_EDGE) +Map 5 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 8 <- Map 19 (SIMPLE_EDGE), Union 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_174] - Select Operator [SEL_173] (rows=1991967 width=223) + Reducer 4 vectorized + File Output Operator [FS_164] + Select Operator [SEL_163] (rows=1991967 width=223) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_172] - Select Operator [SEL_171] (rows=1991967 width=227) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_162] + Select Operator [SEL_161] (rows=1991967 width=227) Output:["_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_170] (rows=1991967 width=223) + Group By Operator [GBY_160] (rows=1991967 width=223) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_46] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_47] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_45] (rows=1991967 width=223) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col0)"],keys:_col4, _col7, _col8, _col5 - Merge Join Operator [MERGEJOIN_140] (rows=1991967 width=112) - Conds:RS_41._col2=RS_169._col0(Inner),Output:["_col0","_col4","_col5","_col7","_col8"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_169] + Group By Operator [GBY_46] (rows=1991967 width=223) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)"],keys:_col7, _col1, _col2, _col8 + Merge Join Operator [MERGEJOIN_141] (rows=1991967 width=112) + Conds:RS_156._col0=RS_43._col2(Inner),Output:["_col1","_col2","_col3","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_156] PartitionCols:_col0 - Select Operator [SEL_168] (rows=43200 width=12) + Select Operator [SEL_155] (rows=43200 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_167] (rows=43200 width=99) + Filter Operator [FIL_154] (rows=43200 width=99) predicate:(t_meal_time) IN ('breakfast', 'dinner') - TableScan [TS_35] (rows=86400 width=99) + TableScan [TS_0] (rows=86400 width=99) default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_hour","t_minute","t_meal_time"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_41] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_43] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_139] (rows=3983933 width=104) - Conds:Union 3._col1=RS_166._col0(Inner),Output:["_col0","_col2","_col4","_col5"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_166] + Merge Join Operator [MERGEJOIN_140] (rows=3983933 width=104) + Conds:Union 7._col1=RS_159._col0(Inner),Output:["_col0","_col2","_col4","_col5"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_159] PartitionCols:_col0 - Select Operator [SEL_165] (rows=7333 width=107) + Select Operator [SEL_158] (rows=7333 width=107) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_164] (rows=7333 width=111) + Filter Operator [FIL_157] (rows=7333 width=111) predicate:(i_manager_id = 1) - TableScan [TS_32] (rows=462000 width=111) + TableScan [TS_35] (rows=462000 width=111) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"] - <-Union 3 [SIMPLE_EDGE] - <-Reducer 11 [CONTAINS] - Reduce Output Operator [RS_148] + <-Union 7 [SIMPLE_EDGE] + <-Reducer 12 [CONTAINS] + Reduce Output Operator [RS_149] PartitionCols:_col1 - Select Operator [SEL_146] (rows=7751851 width=98) + Select Operator [SEL_147] (rows=7751851 width=98) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_145] (rows=7751851 width=98) - Conds:RS_185._col0=RS_177._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_177] + Merge Join Operator [MERGEJOIN_146] (rows=7751851 width=98) + Conds:RS_186._col0=RS_178._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_178] PartitionCols:_col0 - Select Operator [SEL_176] (rows=50 width=4) + Select Operator [SEL_177] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_175] (rows=50 width=12) + Filter Operator [FIL_176] (rows=50 width=12) predicate:((d_year = 2001) and (d_moy = 12)) - TableScan [TS_13] (rows=73049 width=12) + TableScan [TS_16] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_185] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_186] PartitionCols:_col0 - Select Operator [SEL_184] (rows=285116947 width=123) + Select Operator [SEL_185] (rows=285116947 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_183] (rows=285116947 width=123) - predicate:(cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_17_date_dim_d_date_sk_min) AND DynamicValue(RS_17_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_17_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_10] (rows=287989836 width=123) + Filter Operator [FIL_184] (rows=285116947 width=123) + predicate:(cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_13] (rows=287989836 width=123) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_182] - Group By Operator [GBY_181] (rows=1 width=12) + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_183] + Group By Operator [GBY_182] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_180] - Group By Operator [GBY_179] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_181] + Group By Operator [GBY_180] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_178] (rows=50 width=4) + Select Operator [SEL_179] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_176] - <-Reducer 15 [CONTAINS] - Reduce Output Operator [RS_152] + Please refer to the previous Select Operator [SEL_177] + <-Reducer 16 [CONTAINS] + Reduce Output Operator [RS_153] PartitionCols:_col1 - Select Operator [SEL_150] (rows=14384397 width=4) + Select Operator [SEL_151] (rows=14384397 width=4) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_149] (rows=14384397 width=4) - Conds:RS_196._col0=RS_188._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 16 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_188] + Merge Join Operator [MERGEJOIN_150] (rows=14384397 width=4) + Conds:RS_197._col0=RS_189._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_189] PartitionCols:_col0 - Select Operator [SEL_187] (rows=50 width=4) + Select Operator [SEL_188] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_186] (rows=50 width=12) + Filter Operator [FIL_187] (rows=50 width=12) predicate:((d_year = 2001) and (d_moy = 12)) - TableScan [TS_24] (rows=73049 width=12) + TableScan [TS_27] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_196] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_197] PartitionCols:_col0 - Select Operator [SEL_195] (rows=525325345 width=118) + Select Operator [SEL_196] (rows=525325345 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_194] (rows=525325345 width=118) - predicate:(ss_sold_date_sk is not null and ss_sold_time_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_21] (rows=575995635 width=118) + Filter Operator [FIL_195] (rows=525325345 width=118) + predicate:(ss_sold_date_sk is not null and ss_sold_time_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_24] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_sold_time_sk","ss_item_sk","ss_ext_sales_price"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_193] - Group By Operator [GBY_192] (rows=1 width=12) + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_194] + Group By Operator [GBY_193] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_191] - Group By Operator [GBY_190] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_192] + Group By Operator [GBY_191] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_189] (rows=50 width=4) + Select Operator [SEL_190] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_187] - <-Reducer 2 [CONTAINS] - Reduce Output Operator [RS_144] + Please refer to the previous Select Operator [SEL_188] + <-Reducer 6 [CONTAINS] + Reduce Output Operator [RS_145] PartitionCols:_col1 - Select Operator [SEL_142] (rows=3941098 width=118) + Select Operator [SEL_143] (rows=3941098 width=118) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_141] (rows=3941098 width=118) - Conds:RS_163._col0=RS_155._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_155] + Merge Join Operator [MERGEJOIN_142] (rows=3941098 width=118) + Conds:RS_175._col0=RS_167._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_167] PartitionCols:_col0 - Select Operator [SEL_154] (rows=50 width=4) + Select Operator [SEL_166] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_153] (rows=50 width=12) + Filter Operator [FIL_165] (rows=50 width=12) predicate:((d_year = 2001) and (d_moy = 12)) - TableScan [TS_3] (rows=73049 width=12) + TableScan [TS_6] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_163] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_175] PartitionCols:_col0 - Select Operator [SEL_162] (rows=143930836 width=123) + Select Operator [SEL_174] (rows=143930836 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_161] (rows=143930836 width=123) - predicate:(ws_sold_time_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=123) + Filter Operator [FIL_173] (rows=143930836 width=123) + predicate:(ws_sold_time_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_3] (rows=144002668 width=123) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_item_sk","ws_ext_sales_price"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_160] - Group By Operator [GBY_159] (rows=1 width=12) + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_172] + Group By Operator [GBY_171] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_158] - Group By Operator [GBY_157] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_170] + Group By Operator [GBY_169] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_156] (rows=50 width=4) + Select Operator [SEL_168] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_154] + Please refer to the previous Select Operator [SEL_166] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out index b5fc39d98f..c7b6d80a85 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out @@ -81,197 +81,197 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 17 (BROADCAST_EDGE) -Reducer 10 <- Map 24 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) -Reducer 3 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 16 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 19 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 20 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 21 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 22 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 23 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Map 10 <- Reducer 18 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 12 <- Map 15 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 3 <- Map 20 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 22 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 23 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 24 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 12 vectorized - File Output Operator [FS_285] - Limit [LIM_284] (rows=100 width=312) + Reducer 9 vectorized + File Output Operator [FS_286] + Limit [LIM_285] (rows=100 width=312) Number of rows:100 - Select Operator [SEL_283] (rows=182953402 width=312) + Select Operator [SEL_284] (rows=182953402 width=312) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_282] - Top N Key Operator [TNK_281] (rows=182953402 width=312) + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_283] + Top N Key Operator [TNK_282] (rows=182953402 width=312) keys:_col5, _col0, _col1, _col2,top n:100 - Group By Operator [GBY_280] (rows=182953402 width=312) + Group By Operator [GBY_281] (rows=182953402 width=312) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_66] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_67] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_65] (rows=182953402 width=312) + Group By Operator [GBY_66] (rows=182953402 width=312) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col3)","count(_col4)","count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_63] (rows=471834849 width=292) + Select Operator [SEL_64] (rows=471834849 width=292) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_248] (rows=471834849 width=292) - Conds:RS_60._col4, _col6=RS_279._col0, _col1(Left Outer),Output:["_col13","_col15","_col19","_col25"] + Merge Join Operator [MERGEJOIN_249] (rows=471834849 width=292) + Conds:RS_61._col4, _col6=RS_280._col0, _col1(Left Outer),Output:["_col13","_col15","_col19","_col25"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_279] + SHUFFLE [RS_280] PartitionCols:_col0, _col1 - Select Operator [SEL_278] (rows=28798881 width=8) + Select Operator [SEL_279] (rows=28798881 width=8) Output:["_col0","_col1"] - TableScan [TS_58] (rows=28798881 width=8) + TableScan [TS_59] (rows=28798881 width=8) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_60] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_61] PartitionCols:_col4, _col6 - Select Operator [SEL_57] (rows=182953402 width=300) + Select Operator [SEL_58] (rows=182953402 width=300) Output:["_col4","_col6","_col13","_col15","_col19","_col25"] - Merge Join Operator [MERGEJOIN_247] (rows=182953402 width=300) - Conds:RS_54._col4=RS_277._col0(Inner),Output:["_col4","_col6","_col13","_col20","_col21","_col25"] + Merge Join Operator [MERGEJOIN_248] (rows=182953402 width=300) + Conds:RS_55._col8=RS_278._col0(Inner),Output:["_col8","_col10","_col17","_col20","_col21","_col25"] <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_277] + SHUFFLE [RS_278] PartitionCols:_col0 - Select Operator [SEL_276] (rows=462000 width=188) + Select Operator [SEL_277] (rows=462000 width=188) Output:["_col0","_col1"] - TableScan [TS_29] (rows=462000 width=188) + TableScan [TS_39] (rows=462000 width=188) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_desc"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_54] - PartitionCols:_col4 - Filter Operator [FIL_53] (rows=182953402 width=132) - predicate:(_col23 > _col14) - Merge Join Operator [MERGEJOIN_246] (rows=548860207 width=132) - Conds:RS_50._col1=RS_275._col0(Inner),Output:["_col4","_col6","_col13","_col14","_col20","_col21","_col23"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col8 + Filter Operator [FIL_54] (rows=182953402 width=132) + predicate:(_col23 > _col18) + Merge Join Operator [MERGEJOIN_247] (rows=548860207 width=132) + Conds:RS_51._col5=RS_276._col0(Inner),Output:["_col8","_col10","_col17","_col18","_col20","_col21","_col23"] <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_275] + SHUFFLE [RS_276] PartitionCols:_col0 - Select Operator [SEL_274] (rows=73049 width=12) + Select Operator [SEL_275] (rows=73049 width=12) Output:["_col0","_col1"] - Filter Operator [FIL_273] (rows=73049 width=98) + Filter Operator [FIL_274] (rows=73049 width=98) predicate:UDFToDouble(d_date) is not null - TableScan [TS_26] (rows=73049 width=98) + TableScan [TS_36] (rows=73049 width=98) default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_50] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_245] (rows=548860207 width=127) - Conds:RS_47._col5=RS_272._col0(Left Outer),Output:["_col1","_col4","_col6","_col13","_col14","_col20","_col21"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_51] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_246] (rows=548860207 width=127) + Conds:RS_48._col9=RS_273._col0(Left Outer),Output:["_col5","_col8","_col10","_col17","_col18","_col20","_col21"] <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_272] + SHUFFLE [RS_273] PartitionCols:_col0 - Select Operator [SEL_271] (rows=2300 width=4) + Select Operator [SEL_272] (rows=2300 width=4) Output:["_col0"] - TableScan [TS_24] (rows=2300 width=4) + TableScan [TS_34] (rows=2300 width=4) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_47] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_244] (rows=548860207 width=127) - Conds:RS_44._col17=RS_270._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col13","_col14","_col20"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_48] + PartitionCols:_col9 + Merge Join Operator [MERGEJOIN_245] (rows=548860207 width=127) + Conds:RS_45._col2=RS_271._col0(Inner),Output:["_col5","_col8","_col9","_col10","_col17","_col18","_col20"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_270] + SHUFFLE [RS_271] PartitionCols:_col0 - Select Operator [SEL_269] (rows=27 width=104) + Select Operator [SEL_270] (rows=27 width=104) Output:["_col0","_col1"] - TableScan [TS_22] (rows=27 width=104) + TableScan [TS_32] (rows=27 width=104) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col17 - Filter Operator [FIL_43] (rows=548860207 width=39) - predicate:(_col18 < _col7) - Merge Join Operator [MERGEJOIN_243] (rows=1646580622 width=39) - Conds:RS_40._col10, _col4=RS_268._col0, _col1(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col13","_col14","_col17","_col18"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_268] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col2 + Filter Operator [FIL_44] (rows=548860207 width=39) + predicate:(_col3 < _col11) + Merge Join Operator [MERGEJOIN_244] (rows=1646580622 width=39) + Conds:RS_252._col0, _col1=RS_42._col10, _col4(Inner),Output:["_col2","_col3","_col5","_col8","_col9","_col10","_col11","_col17","_col18"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_252] PartitionCols:_col0, _col1 - Select Operator [SEL_267] (rows=35703276 width=15) + Select Operator [SEL_251] (rows=35703276 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_266] (rows=35703276 width=15) + Filter Operator [FIL_250] (rows=35703276 width=15) predicate:inv_quantity_on_hand is not null - TableScan [TS_19] (rows=37584000 width=15) + TableScan [TS_0] (rows=37584000 width=15) default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_40] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_42] PartitionCols:_col10, _col4 - Merge Join Operator [MERGEJOIN_242] (rows=2885264 width=30) - Conds:RS_37._col0=RS_38._col2(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col10","_col13","_col14"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_38] + Merge Join Operator [MERGEJOIN_243] (rows=2885264 width=30) + Conds:RS_28._col0=RS_29._col2(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col10","_col13","_col14"] + <-Reducer 17 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_29] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_241] (rows=3621 width=20) - Conds:RS_251._col1=RS_254._col1(Inner),Output:["_col0","_col2","_col3","_col4"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_251] + Merge Join Operator [MERGEJOIN_242] (rows=3621 width=20) + Conds:RS_255._col1=RS_258._col1(Inner),Output:["_col0","_col2","_col3","_col4"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_255] PartitionCols:_col1 - Select Operator [SEL_250] (rows=73049 width=8) + Select Operator [SEL_254] (rows=73049 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_249] (rows=73049 width=8) + Filter Operator [FIL_253] (rows=73049 width=8) predicate:d_week_seq is not null - TableScan [TS_9] (rows=73049 width=8) + TableScan [TS_12] (rows=73049 width=8) default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_254] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_258] PartitionCols:_col1 - Select Operator [SEL_253] (rows=652 width=16) + Select Operator [SEL_257] (rows=652 width=16) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_252] (rows=652 width=106) + Filter Operator [FIL_256] (rows=652 width=106) predicate:((d_year = 2001) and d_week_seq is not null and UDFToDouble(d_date) is not null) - TableScan [TS_12] (rows=73049 width=106) + TableScan [TS_15] (rows=73049 width=106) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_week_seq","d_year"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_37] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_28] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_240] (rows=8138146 width=21) - Conds:RS_34._col3=RS_265._col0(Inner),Output:["_col0","_col1","_col4","_col5","_col6","_col7"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_265] + Merge Join Operator [MERGEJOIN_241] (rows=8138146 width=21) + Conds:RS_25._col3=RS_269._col0(Inner),Output:["_col0","_col1","_col4","_col5","_col6","_col7"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_269] PartitionCols:_col0 - Select Operator [SEL_264] (rows=1440 width=4) + Select Operator [SEL_268] (rows=1440 width=4) Output:["_col0"] - Filter Operator [FIL_263] (rows=1440 width=96) + Filter Operator [FIL_267] (rows=1440 width=96) predicate:(hd_buy_potential = '1001-5000') - TableScan [TS_6] (rows=7200 width=96) + TableScan [TS_9] (rows=7200 width=96) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_buy_potential"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_34] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_25] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_239] (rows=40690727 width=27) - Conds:RS_259._col2=RS_262._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_259] + Merge Join Operator [MERGEJOIN_240] (rows=40690727 width=27) + Conds:RS_263._col2=RS_266._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_263] PartitionCols:_col2 - Select Operator [SEL_258] (rows=280863798 width=31) + Select Operator [SEL_262] (rows=280863798 width=31) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_257] (rows=280863798 width=31) - predicate:(cs_sold_date_sk is not null and cs_bill_cdemo_sk is not null and cs_ship_date_sk is not null and cs_quantity is not null and cs_bill_hdemo_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_38_d1_d_date_sk_min) AND DynamicValue(RS_38_d1_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_38_d1_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=287989836 width=31) + Filter Operator [FIL_261] (rows=280863798 width=31) + predicate:(cs_sold_date_sk is not null and cs_bill_cdemo_sk is not null and cs_ship_date_sk is not null and cs_quantity is not null and cs_bill_hdemo_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_29_d1_d_date_sk_min) AND DynamicValue(RS_29_d1_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_29_d1_d_date_sk_bloom_filter))) + TableScan [TS_3] (rows=287989836 width=31) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_bill_cdemo_sk","cs_bill_hdemo_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_quantity"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_256] - Group By Operator [GBY_255] (rows=1 width=12) + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_260] + Group By Operator [GBY_259] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 16 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_143] - Group By Operator [GBY_142] (rows=1 width=12) + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_164] + Group By Operator [GBY_163] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_141] (rows=3621 width=8) + Select Operator [SEL_162] (rows=3621 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_241] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_262] + Please refer to the previous Merge Join Operator [MERGEJOIN_242] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_266] PartitionCols:_col0 - Select Operator [SEL_261] (rows=265971 width=4) + Select Operator [SEL_265] (rows=265971 width=4) Output:["_col0"] - Filter Operator [FIL_260] (rows=265971 width=89) + Filter Operator [FIL_264] (rows=265971 width=89) predicate:(cd_marital_status = 'M') - TableScan [TS_3] (rows=1861800 width=89) + TableScan [TS_6] (rows=1861800 width=89) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query74.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query74.q.out index 31d1fee130..da8e0fa1a7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query74.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query74.q.out @@ -131,269 +131,269 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 24 (BROADCAST_EDGE) -Map 13 <- Reducer 23 (BROADCAST_EDGE) -Map 17 <- Reducer 22 (BROADCAST_EDGE) -Map 9 <- Reducer 25 (BROADCAST_EDGE) -Reducer 10 <- Map 21 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 26 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 15 <- Map 26 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 19 <- Map 26 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE) -Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 26 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 20 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Map 14 <- Reducer 17 (BROADCAST_EDGE) +Map 24 <- Reducer 19 (BROADCAST_EDGE) +Map 25 <- Reducer 21 (BROADCAST_EDGE) +Map 26 <- Reducer 23 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 12 <- Map 1 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 16 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 19 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 20 <- Map 16 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) +Reducer 21 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 16 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 23 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 1 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_349] - Limit [LIM_348] (rows=100 width=280) + Reducer 5 vectorized + File Output Operator [FS_355] + Limit [LIM_354] (rows=100 width=280) Number of rows:100 - Select Operator [SEL_347] (rows=12248094 width=280) + Select Operator [SEL_353] (rows=12248094 width=280) Output:["_col0","_col1","_col2"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_89] - Select Operator [SEL_88] (rows=12248094 width=280) + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_95] + Select Operator [SEL_94] (rows=12248094 width=280) Output:["_col0","_col1","_col2"] - Top N Key Operator [TNK_154] (rows=12248094 width=732) - keys:_col8, _col7, _col9,top n:100 - Filter Operator [FIL_87] (rows=12248094 width=732) - predicate:CASE WHEN (_col4 is not null) THEN (CASE WHEN (_col2) THEN (((_col6 / _col1) > (_col10 / _col4))) ELSE (false) END) ELSE (false) END - Merge Join Operator [MERGEJOIN_284] (rows=24496188 width=732) - Conds:RS_84._col3=RS_346._col0(Inner),Output:["_col1","_col2","_col4","_col6","_col7","_col8","_col9","_col10"] - <-Reducer 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_346] + Top N Key Operator [TNK_160] (rows=12248094 width=732) + keys:_col1, _col0, _col2,top n:100 + Filter Operator [FIL_93] (rows=12248094 width=732) + predicate:CASE WHEN (_col10 is not null) THEN (CASE WHEN (_col8) THEN (((_col5 / _col7) > (_col3 / _col10))) ELSE (false) END) ELSE (false) END + Merge Join Operator [MERGEJOIN_290] (rows=24496188 width=732) + Conds:RS_324._col0=RS_91._col5(Inner),Output:["_col0","_col1","_col2","_col3","_col5","_col7","_col8","_col10"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_324] PartitionCols:_col0 - Group By Operator [GBY_345] (rows=80000000 width=392) + Group By Operator [GBY_323] (rows=80000000 width=392) Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_75] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_74] (rows=80000000 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 + Group By Operator [GBY_16] (rows=80000000 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col6)"],keys:_col1, _col2, _col3 Merge Join Operator [MERGEJOIN_281] (rows=187573258 width=377) - Conds:RS_70._col1=RS_313._col0(Inner),Output:["_col2","_col5","_col6","_col7"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_313] + Conds:RS_292._col0=RS_13._col1(Inner),Output:["_col1","_col2","_col3","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_292] PartitionCols:_col0 - Select Operator [SEL_312] (rows=80000000 width=284) + Select Operator [SEL_291] (rows=80000000 width=284) Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_65] (rows=80000000 width=284) + TableScan [TS_0] (rows=80000000 width=284) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_70] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_13] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_280] (rows=187573258 width=101) - Conds:RS_344._col0=RS_291._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_291] + Conds:RS_322._col0=RS_302._col0(Inner),Output:["_col1","_col2"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_302] PartitionCols:_col0 - Select Operator [SEL_288] (rows=652 width=4) + Select Operator [SEL_299] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_285] (rows=652 width=8) + Filter Operator [FIL_296] (rows=652 width=8) predicate:((d_year = 2002) and (d_year) IN (2001, 2002)) - TableScan [TS_62] (rows=73049 width=8) + TableScan [TS_5] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_344] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_322] PartitionCols:_col0 - Select Operator [SEL_343] (rows=525327388 width=114) + Select Operator [SEL_321] (rows=525327388 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_342] (rows=525327388 width=114) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_68_date_dim_d_date_sk_min) AND DynamicValue(RS_68_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_68_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_59] (rows=575995635 width=114) + Filter Operator [FIL_320] (rows=525327388 width=114) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_2] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_341] - Group By Operator [GBY_340] (rows=1 width=12) + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_319] + Group By Operator [GBY_318] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_303] - Group By Operator [GBY_299] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_314] + Group By Operator [GBY_310] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_292] (rows=652 width=4) + Select Operator [SEL_303] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_288] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_84] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_283] (rows=20485012 width=440) - Conds:RS_81._col3=RS_339._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"] - <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_339] - PartitionCols:_col0 - Select Operator [SEL_338] (rows=51391963 width=212) - Output:["_col0","_col1"] - Group By Operator [GBY_337] (rows=51391963 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_55] (rows=51391963 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_279] (rows=51391963 width=391) - Conds:RS_51._col1=RS_314._col0(Inner),Output:["_col2","_col5","_col6","_col7"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_314] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_312] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_278] (rows=51391963 width=115) - Conds:RS_336._col0=RS_293._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_293] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_288] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_336] - PartitionCols:_col0 - Select Operator [SEL_335] (rows=143930993 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_334] (rows=143930993 width=119) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_49_date_dim_d_date_sk_min) AND DynamicValue(RS_49_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_49_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_40] (rows=144002668 width=119) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_333] - Group By Operator [GBY_332] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_304] - Group By Operator [GBY_300] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_294] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_288] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_81] + Please refer to the previous Select Operator [SEL_299] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_91] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_289] (rows=20485012 width=440) + Conds:RS_332._col0=RS_87._col3(Inner),Output:["_col1","_col3","_col4","_col5","_col6"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_87] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_282] (rows=17130654 width=328) - Conds:RS_321._col0=RS_331._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Reducer 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_331] + Merge Join Operator [MERGEJOIN_288] (rows=17130654 width=328) + Conds:RS_342._col0=RS_352._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_342] PartitionCols:_col0 - Select Operator [SEL_330] (rows=26666666 width=212) - Output:["_col0","_col1"] - Filter Operator [FIL_329] (rows=26666666 width=212) + Select Operator [SEL_341] (rows=17130654 width=216) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_340] (rows=17130654 width=212) predicate:(_col3 > 0) - Select Operator [SEL_328] (rows=80000000 width=212) + Select Operator [SEL_339] (rows=51391963 width=212) Output:["_col0","_col3"] - Group By Operator [GBY_327] (rows=80000000 width=392) + Group By Operator [GBY_338] (rows=51391963 width=392) Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_36] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_57] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_35] (rows=80000000 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_277] (rows=187573258 width=377) - Conds:RS_31._col1=RS_316._col0(Inner),Output:["_col2","_col5","_col6","_col7"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_316] + Group By Operator [GBY_56] (rows=51391963 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col6)"],keys:_col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_285] (rows=51391963 width=391) + Conds:RS_294._col0=RS_53._col1(Inner),Output:["_col1","_col2","_col3","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_294] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_312] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_31] + Please refer to the previous Select Operator [SEL_291] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_53] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_276] (rows=187573258 width=101) - Conds:RS_326._col0=RS_297._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_297] + Merge Join Operator [MERGEJOIN_284] (rows=51391963 width=115) + Conds:RS_337._col0=RS_306._col0(Inner),Output:["_col1","_col2"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_306] PartitionCols:_col0 - Select Operator [SEL_290] (rows=652 width=4) + Select Operator [SEL_300] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_287] (rows=652 width=8) + Filter Operator [FIL_297] (rows=652 width=8) predicate:((d_year = 2001) and (d_year) IN (2001, 2002)) - Please refer to the previous TableScan [TS_62] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_326] + Please refer to the previous TableScan [TS_5] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_337] PartitionCols:_col0 - Select Operator [SEL_325] (rows=525327388 width=114) + Select Operator [SEL_336] (rows=143930993 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_324] (rows=525327388 width=114) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_29_date_dim_d_date_sk_min) AND DynamicValue(RS_29_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_29_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_20] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_323] - Group By Operator [GBY_322] (rows=1 width=12) + Filter Operator [FIL_335] (rows=143930993 width=119) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_49_date_dim_d_date_sk_min) AND DynamicValue(RS_49_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_49_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_42] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_334] + Group By Operator [GBY_333] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_306] - Group By Operator [GBY_302] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_316] + Group By Operator [GBY_312] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_298] (rows=652 width=4) + Select Operator [SEL_307] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_290] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + Please refer to the previous Select Operator [SEL_300] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_352] PartitionCols:_col0 - Select Operator [SEL_320] (rows=17130654 width=216) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_319] (rows=17130654 width=212) + Select Operator [SEL_351] (rows=26666666 width=212) + Output:["_col0","_col1"] + Filter Operator [FIL_350] (rows=26666666 width=212) predicate:(_col3 > 0) - Select Operator [SEL_318] (rows=51391963 width=212) + Select Operator [SEL_349] (rows=80000000 width=212) Output:["_col0","_col3"] - Group By Operator [GBY_317] (rows=51391963 width=392) + Group By Operator [GBY_348] (rows=80000000 width=392) Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_16] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_78] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_15] (rows=51391963 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_275] (rows=51391963 width=391) - Conds:RS_11._col1=RS_315._col0(Inner),Output:["_col2","_col5","_col6","_col7"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_315] + Group By Operator [GBY_77] (rows=80000000 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col6)"],keys:_col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_287] (rows=187573258 width=377) + Conds:RS_295._col0=RS_74._col1(Inner),Output:["_col1","_col2","_col3","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_312] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_11] + Please refer to the previous Select Operator [SEL_291] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_74] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_274] (rows=51391963 width=115) - Conds:RS_311._col0=RS_295._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_295] + Merge Join Operator [MERGEJOIN_286] (rows=187573258 width=101) + Conds:RS_347._col0=RS_308._col0(Inner),Output:["_col1","_col2"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_308] PartitionCols:_col0 - Select Operator [SEL_289] (rows=652 width=4) + Select Operator [SEL_301] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_286] (rows=652 width=8) + Filter Operator [FIL_298] (rows=652 width=8) predicate:((d_year = 2001) and (d_year) IN (2001, 2002)) - Please refer to the previous TableScan [TS_62] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_311] + Please refer to the previous TableScan [TS_5] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_347] PartitionCols:_col0 - Select Operator [SEL_310] (rows=143930993 width=119) + Select Operator [SEL_346] (rows=525327388 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_309] (rows=143930993 width=119) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=119) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_308] - Group By Operator [GBY_307] (rows=1 width=12) + Filter Operator [FIL_345] (rows=525327388 width=114) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_70_date_dim_d_date_sk_min) AND DynamicValue(RS_70_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_70_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_63] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_344] + Group By Operator [GBY_343] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_305] - Group By Operator [GBY_301] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_317] + Group By Operator [GBY_313] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_296] (rows=652 width=4) + Select Operator [SEL_309] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_289] + Please refer to the previous Select Operator [SEL_301] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_332] + PartitionCols:_col0 + Select Operator [SEL_331] (rows=51391963 width=212) + Output:["_col0","_col1"] + Group By Operator [GBY_330] (rows=51391963 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_36] (rows=51391963 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col6)"],keys:_col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_283] (rows=51391963 width=391) + Conds:RS_293._col0=RS_33._col1(Inner),Output:["_col1","_col2","_col3","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_293] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_291] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_282] (rows=51391963 width=115) + Conds:RS_329._col0=RS_304._col0(Inner),Output:["_col1","_col2"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_304] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_299] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_329] + PartitionCols:_col0 + Select Operator [SEL_328] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_327] (rows=143930993 width=119) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_29_date_dim_d_date_sk_min) AND DynamicValue(RS_29_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_29_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_22] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_326] + Group By Operator [GBY_325] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_315] + Group By Operator [GBY_311] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_305] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_299] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query75.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query75.q.out index a705dd86d5..2dc70b76d9 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query75.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query75.q.out @@ -157,432 +157,432 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 12 (BROADCAST_EDGE) -Map 39 <- Reducer 16 (BROADCAST_EDGE) -Map 41 <- Reducer 20 (BROADCAST_EDGE) -Map 43 <- Reducer 28 (BROADCAST_EDGE) -Map 44 <- Reducer 32 (BROADCAST_EDGE) -Map 45 <- Reducer 36 (BROADCAST_EDGE) -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 11 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE) -Reducer 14 <- Map 37 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Map 40 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 16 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 11 (SIMPLE_EDGE), Map 41 (SIMPLE_EDGE) -Reducer 18 <- Map 37 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) -Reducer 19 <- Map 42 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 20 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 21 <- Map 11 (SIMPLE_EDGE), Map 43 (SIMPLE_EDGE) -Reducer 22 <- Map 37 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 23 <- Map 38 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE), Union 24 (CONTAINS) -Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS) -Reducer 27 <- Union 26 (SIMPLE_EDGE) -Reducer 28 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 11 (SIMPLE_EDGE), Map 44 (SIMPLE_EDGE) -Reducer 3 <- Map 37 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 37 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) -Reducer 31 <- Map 40 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE), Union 24 (CONTAINS) -Reducer 32 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Map 11 (SIMPLE_EDGE), Map 45 (SIMPLE_EDGE) -Reducer 34 <- Map 37 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) -Reducer 35 <- Map 42 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE), Union 26 (CONTAINS) -Reducer 36 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Map 38 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 8 <- Union 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 27 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Map 14 <- Reducer 18 (BROADCAST_EDGE) +Map 40 <- Reducer 22 (BROADCAST_EDGE) +Map 42 <- Reducer 26 (BROADCAST_EDGE) +Map 43 <- Reducer 29 (BROADCAST_EDGE) +Map 44 <- Reducer 33 (BROADCAST_EDGE) +Map 45 <- Reducer 37 (BROADCAST_EDGE) +Reducer 11 <- Union 10 (SIMPLE_EDGE), Union 12 (CONTAINS) +Reducer 13 <- Union 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 16 <- Map 38 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 40 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 20 <- Map 38 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 39 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 22 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 17 (SIMPLE_EDGE), Map 42 (SIMPLE_EDGE) +Reducer 24 <- Map 38 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Map 41 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 26 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 17 (SIMPLE_EDGE), Map 43 (SIMPLE_EDGE) +Reducer 28 <- Map 38 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) +Reducer 29 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 30 <- Map 17 (SIMPLE_EDGE), Map 44 (SIMPLE_EDGE) +Reducer 31 <- Map 38 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 32 <- Map 39 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE), Union 10 (CONTAINS) +Reducer 33 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 34 <- Map 17 (SIMPLE_EDGE), Map 45 (SIMPLE_EDGE) +Reducer 35 <- Map 38 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) +Reducer 36 <- Map 41 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE), Union 12 (CONTAINS) +Reducer 37 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 13 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE), Union 10 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 10 vectorized - File Output Operator [FS_612] - Select Operator [SEL_611] (rows=100 width=160) + Reducer 8 vectorized + File Output Operator [FS_630] + Select Operator [SEL_629] (rows=100 width=160) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Limit [LIM_610] (rows=100 width=152) + Limit [LIM_628] (rows=100 width=152) Number of rows:100 - Select Operator [SEL_609] (rows=3422897230256 width=151) + Select Operator [SEL_627] (rows=3422897230256 width=151) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_169] - Select Operator [SEL_168] (rows=3422897230256 width=151) + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_175] + Select Operator [SEL_174] (rows=3422897230256 width=151) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Top N Key Operator [TNK_258] (rows=3422897230256 width=255) + Top N Key Operator [TNK_264] (rows=3422897230256 width=255) keys:(_col10 - _col4),top n:100 - Filter Operator [FIL_167] (rows=3422897230256 width=255) + Filter Operator [FIL_173] (rows=3422897230256 width=255) predicate:((CAST( _col10 AS decimal(17,2)) / CAST( _col4 AS decimal(17,2))) < 0.9) - Merge Join Operator [MERGEJOIN_513] (rows=10268691690770 width=255) - Conds:RS_605._col0, _col1, _col2, _col3=RS_608._col0, _col1, _col2, _col3(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col10","_col11"] - <-Reducer 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_608] + Merge Join Operator [MERGEJOIN_531] (rows=10268691690770 width=255) + Conds:RS_623._col0, _col1, _col2, _col3=RS_626._col0, _col1, _col2, _col3(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col10","_col11"] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_626] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_607] (rows=84235776 width=135) + Group By Operator [GBY_625] (rows=84235776 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3 - Group By Operator [GBY_606] (rows=736356923 width=131) + Group By Operator [GBY_624] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Union 26 [SIMPLE_EDGE] - <-Reducer 25 [CONTAINS] vectorized - Reduce Output Operator [RS_636] + <-Union 12 [SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] vectorized + Reduce Output Operator [RS_638] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_635] (rows=736356923 width=131) + Group By Operator [GBY_637] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_634] (rows=621178955 width=131) + Group By Operator [GBY_636] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Union 24 [SIMPLE_EDGE] - <-Reducer 23 [CONTAINS] - Reduce Output Operator [RS_537] + <-Union 10 [SIMPLE_EDGE] + <-Reducer 32 [CONTAINS] + Reduce Output Operator [RS_564] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_536] (rows=621178955 width=131) + Group By Operator [GBY_563] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_534] (rows=170474971 width=131) + Select Operator [SEL_561] (rows=450703984 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_533] (rows=170474971 width=234) - Conds:RS_99._col1, _col2=RS_599._col0, _col1(Left Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_599] + Merge Join Operator [MERGEJOIN_560] (rows=450703984 width=204) + Conds:RS_646._col0, _col1=RS_126._col1, _col2(Right Outer),Output:["_col2","_col3","_col7","_col8","_col11","_col12","_col13","_col14"] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_646] PartitionCols:_col0, _col1 - Select Operator [SEL_597] (rows=28798881 width=121) + Select Operator [SEL_644] (rows=57591150 width=119) Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_9] (rows=28798881 width=121) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_quantity","cr_return_amount"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_99] + TableScan [TS_22] (rows=57591150 width=119) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_quantity","sr_return_amt"] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_126] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_505] (rows=96821196 width=138) - Conds:RS_96._col1=RS_594._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_594] + Merge Join Operator [MERGEJOIN_526] (rows=187186493 width=124) + Conds:RS_121._col1=RS_616._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_616] PartitionCols:_col0 - Select Operator [SEL_590] (rows=45745 width=19) + Select Operator [SEL_611] (rows=45745 width=19) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_589] (rows=45745 width=109) + Filter Operator [FIL_610] (rows=45745 width=109) predicate:((i_category = 'Sports') and i_manufact_id is not null and i_category_id is not null and i_brand_id is not null and i_class_id is not null) - TableScan [TS_6] (rows=462000 width=109) + TableScan [TS_8] (rows=462000 width=109) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id","i_category","i_manufact_id"] - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_96] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_121] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_504] (rows=101592102 width=122) - Conds:RS_633._col0=RS_566._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 11 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_566] + Merge Join Operator [MERGEJOIN_525] (rows=196410188 width=109) + Conds:RS_659._col0=RS_589._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_589] PartitionCols:_col0 - Select Operator [SEL_557] (rows=652 width=4) + Select Operator [SEL_579] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_553] (rows=652 width=8) + Filter Operator [FIL_575] (rows=652 width=8) predicate:(d_year = 2002) - TableScan [TS_3] (rows=73049 width=8) + TableScan [TS_5] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 43 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_633] + <-Map 44 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_659] PartitionCols:_col0 - Select Operator [SEL_632] (rows=286549727 width=127) + Select Operator [SEL_658] (rows=550076554 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_631] (rows=286549727 width=127) - predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_94_date_dim_d_date_sk_min) AND DynamicValue(RS_94_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_94_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_82] (rows=287989836 width=127) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_ext_sales_price"] - <-Reducer 28 [BROADCAST_EDGE] vectorized - BROADCAST [RS_630] - Group By Operator [GBY_629] (rows=1 width=12) + Filter Operator [FIL_657] (rows=550076554 width=122) + predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_119_date_dim_d_date_sk_min) AND DynamicValue(RS_119_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_119_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_109] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_ext_sales_price"] + <-Reducer 33 [BROADCAST_EDGE] vectorized + BROADCAST [RS_656] + Group By Operator [GBY_655] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_581] - Group By Operator [GBY_575] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_603] + Group By Operator [GBY_597] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_567] (rows=652 width=4) + Select Operator [SEL_590] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_557] - <-Reducer 31 [CONTAINS] - Reduce Output Operator [RS_546] + Please refer to the previous Select Operator [SEL_579] + <-Reducer 9 [CONTAINS] + Reduce Output Operator [RS_545] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_545] (rows=621178955 width=131) + Group By Operator [GBY_544] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_543] (rows=450703984 width=131) + Select Operator [SEL_542] (rows=170474971 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_542] (rows=450703984 width=204) - Conds:RS_120._col1, _col2=RS_620._col0, _col1(Left Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"] - <-Map 40 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_620] + Merge Join Operator [MERGEJOIN_541] (rows=170474971 width=234) + Conds:RS_572._col0, _col1=RS_104._col1, _col2(Right Outer),Output:["_col2","_col3","_col7","_col8","_col11","_col12","_col13","_col14"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_572] PartitionCols:_col0, _col1 - Select Operator [SEL_618] (rows=57591150 width=119) + Select Operator [SEL_570] (rows=28798881 width=121) Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_30] (rows=57591150 width=119) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_quantity","sr_return_amt"] - <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_120] + TableScan [TS_0] (rows=28798881 width=121) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_quantity","cr_return_amount"] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_104] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_508] (rows=187186493 width=124) - Conds:RS_117._col1=RS_595._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_595] + Merge Join Operator [MERGEJOIN_523] (rows=96821196 width=138) + Conds:RS_99._col1=RS_615._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_615] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_590] - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_117] + Please refer to the previous Select Operator [SEL_611] + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_99] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_507] (rows=196410188 width=109) - Conds:RS_641._col0=RS_568._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 11 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_568] + Merge Join Operator [MERGEJOIN_522] (rows=101592102 width=122) + Conds:RS_635._col0=RS_587._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_587] PartitionCols:_col0 - Select Operator [SEL_558] (rows=652 width=4) + Select Operator [SEL_578] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_554] (rows=652 width=8) + Filter Operator [FIL_574] (rows=652 width=8) predicate:(d_year = 2002) - Please refer to the previous TableScan [TS_3] - <-Map 44 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_641] + Please refer to the previous TableScan [TS_5] + <-Map 43 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_635] PartitionCols:_col0 - Select Operator [SEL_640] (rows=550076554 width=122) + Select Operator [SEL_634] (rows=286549727 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_639] (rows=550076554 width=122) - predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_115_date_dim_d_date_sk_min) AND DynamicValue(RS_115_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_115_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_103] (rows=575995635 width=122) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_ext_sales_price"] - <-Reducer 32 [BROADCAST_EDGE] vectorized - BROADCAST [RS_638] - Group By Operator [GBY_637] (rows=1 width=12) + Filter Operator [FIL_633] (rows=286549727 width=127) + predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_97_date_dim_d_date_sk_min) AND DynamicValue(RS_97_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_97_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_87] (rows=287989836 width=127) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_ext_sales_price"] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_632] + Group By Operator [GBY_631] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_582] - Group By Operator [GBY_576] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_602] + Group By Operator [GBY_596] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_569] (rows=652 width=4) + Select Operator [SEL_588] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_558] - <-Reducer 35 [CONTAINS] - Reduce Output Operator [RS_551] + Please refer to the previous Select Operator [SEL_578] + <-Reducer 36 [CONTAINS] + Reduce Output Operator [RS_569] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_550] (rows=736356923 width=131) + Group By Operator [GBY_568] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_548] (rows=115177968 width=131) + Select Operator [SEL_566] (rows=115177968 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_547] (rows=115177968 width=220) - Conds:RS_148._col1, _col2=RS_628._col0, _col1(Left Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"] - <-Map 42 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_628] + Merge Join Operator [MERGEJOIN_565] (rows=115177968 width=220) + Conds:RS_654._col0, _col1=RS_155._col1, _col2(Right Outer),Output:["_col2","_col3","_col7","_col8","_col11","_col12","_col13","_col14"] + <-Map 41 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_654] PartitionCols:_col0, _col1 - Select Operator [SEL_626] (rows=14398467 width=118) + Select Operator [SEL_652] (rows=14398467 width=118) Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_58] (rows=14398467 width=118) + TableScan [TS_51] (rows=14398467 width=118) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_quantity","wr_return_amt"] - <-Reducer 34 [SIMPLE_EDGE] - SHUFFLE [RS_148] + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_155] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_511] (rows=48990732 width=139) - Conds:RS_145._col1=RS_596._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_596] + Merge Join Operator [MERGEJOIN_529] (rows=48990732 width=139) + Conds:RS_150._col1=RS_617._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_617] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_590] - <-Reducer 33 [SIMPLE_EDGE] - SHUFFLE [RS_145] + Please refer to the previous Select Operator [SEL_611] + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_150] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_510] (rows=51404771 width=123) - Conds:RS_646._col0=RS_570._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 11 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_570] + Merge Join Operator [MERGEJOIN_528] (rows=51404771 width=123) + Conds:RS_664._col0=RS_591._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_591] PartitionCols:_col0 - Select Operator [SEL_559] (rows=652 width=4) + Select Operator [SEL_580] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_555] (rows=652 width=8) + Filter Operator [FIL_576] (rows=652 width=8) predicate:(d_year = 2002) - Please refer to the previous TableScan [TS_3] + Please refer to the previous TableScan [TS_5] <-Map 45 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_646] + SHUFFLE [RS_664] PartitionCols:_col0 - Select Operator [SEL_645] (rows=143966864 width=127) + Select Operator [SEL_663] (rows=143966864 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_644] (rows=143966864 width=127) - predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_143_date_dim_d_date_sk_min) AND DynamicValue(RS_143_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_143_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_131] (rows=144002668 width=127) + Filter Operator [FIL_662] (rows=143966864 width=127) + predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_148_date_dim_d_date_sk_min) AND DynamicValue(RS_148_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_148_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_138] (rows=144002668 width=127) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_ext_sales_price"] - <-Reducer 36 [BROADCAST_EDGE] vectorized - BROADCAST [RS_643] - Group By Operator [GBY_642] (rows=1 width=12) + <-Reducer 37 [BROADCAST_EDGE] vectorized + BROADCAST [RS_661] + Group By Operator [GBY_660] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_583] - Group By Operator [GBY_577] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_604] + Group By Operator [GBY_598] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_571] (rows=652 width=4) + Select Operator [SEL_592] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_559] - <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_605] + Please refer to the previous Select Operator [SEL_580] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_623] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_604] (rows=84235776 width=135) + Group By Operator [GBY_622] (rows=84235776 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3 - Group By Operator [GBY_603] (rows=736356923 width=131) + Group By Operator [GBY_621] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Union 7 [SIMPLE_EDGE] - <-Reducer 19 [CONTAINS] - Reduce Output Operator [RS_532] + <-Union 5 [SIMPLE_EDGE] + <-Reducer 25 [CONTAINS] + Reduce Output Operator [RS_559] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_531] (rows=736356923 width=131) + Group By Operator [GBY_558] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_529] (rows=115177968 width=131) + Select Operator [SEL_556] (rows=115177968 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_528] (rows=115177968 width=220) - Conds:RS_66._col1, _col2=RS_627._col0, _col1(Left Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"] - <-Map 42 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_627] + Merge Join Operator [MERGEJOIN_555] (rows=115177968 width=220) + Conds:RS_653._col0, _col1=RS_70._col1, _col2(Right Outer),Output:["_col2","_col3","_col7","_col8","_col11","_col12","_col13","_col14"] + <-Map 41 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_653] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_626] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_66] + Please refer to the previous Select Operator [SEL_652] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_70] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_502] (rows=48990732 width=139) - Conds:RS_63._col1=RS_593._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_593] + Merge Join Operator [MERGEJOIN_520] (rows=48990732 width=139) + Conds:RS_65._col1=RS_614._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_614] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_590] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_63] + Please refer to the previous Select Operator [SEL_611] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_65] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_501] (rows=51404771 width=123) - Conds:RS_625._col0=RS_564._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 11 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_564] + Merge Join Operator [MERGEJOIN_519] (rows=51404771 width=123) + Conds:RS_651._col0=RS_585._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_585] PartitionCols:_col0 - Select Operator [SEL_556] (rows=652 width=4) + Select Operator [SEL_577] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_552] (rows=652 width=8) + Filter Operator [FIL_573] (rows=652 width=8) predicate:(d_year = 2001) - Please refer to the previous TableScan [TS_3] - <-Map 41 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_625] + Please refer to the previous TableScan [TS_5] + <-Map 42 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_651] PartitionCols:_col0 - Select Operator [SEL_624] (rows=143966864 width=127) + Select Operator [SEL_650] (rows=143966864 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_623] (rows=143966864 width=127) - predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_61_date_dim_d_date_sk_min) AND DynamicValue(RS_61_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_61_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_49] (rows=144002668 width=127) + Filter Operator [FIL_649] (rows=143966864 width=127) + predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_63_date_dim_d_date_sk_min) AND DynamicValue(RS_63_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_63_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_53] (rows=144002668 width=127) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_ext_sales_price"] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_622] - Group By Operator [GBY_621] (rows=1 width=12) + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_648] + Group By Operator [GBY_647] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_580] - Group By Operator [GBY_574] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_601] + Group By Operator [GBY_595] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_565] (rows=652 width=4) + Select Operator [SEL_586] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_556] - <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_602] + Please refer to the previous Select Operator [SEL_577] + <-Reducer 4 [CONTAINS] vectorized + Reduce Output Operator [RS_620] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_601] (rows=736356923 width=131) + Group By Operator [GBY_619] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_600] (rows=621178955 width=131) + Group By Operator [GBY_618] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 15 [CONTAINS] - Reduce Output Operator [RS_527] + <-Union 3 [SIMPLE_EDGE] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_536] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_526] (rows=621178955 width=131) + Group By Operator [GBY_535] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_524] (rows=450703984 width=131) + Select Operator [SEL_533] (rows=170474971 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_523] (rows=450703984 width=204) - Conds:RS_38._col1, _col2=RS_619._col0, _col1(Left Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"] - <-Map 40 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_619] + Merge Join Operator [MERGEJOIN_532] (rows=170474971 width=234) + Conds:RS_571._col0, _col1=RS_19._col1, _col2(Right Outer),Output:["_col2","_col3","_col7","_col8","_col11","_col12","_col13","_col14"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_571] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_618] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_38] + Please refer to the previous Select Operator [SEL_570] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_19] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_499] (rows=187186493 width=124) - Conds:RS_35._col1=RS_592._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_592] + Merge Join Operator [MERGEJOIN_514] (rows=96821196 width=138) + Conds:RS_14._col1=RS_612._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_612] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_590] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_35] + Please refer to the previous Select Operator [SEL_611] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_498] (rows=196410188 width=109) - Conds:RS_617._col0=RS_562._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 11 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_562] + Merge Join Operator [MERGEJOIN_513] (rows=101592102 width=122) + Conds:RS_609._col0=RS_581._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_581] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_556] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_617] + Please refer to the previous Select Operator [SEL_577] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_609] PartitionCols:_col0 - Select Operator [SEL_616] (rows=550076554 width=122) + Select Operator [SEL_608] (rows=286549727 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_615] (rows=550076554 width=122) - predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_33_date_dim_d_date_sk_min) AND DynamicValue(RS_33_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_33_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_21] (rows=575995635 width=122) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_ext_sales_price"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_614] - Group By Operator [GBY_613] (rows=1 width=12) + Filter Operator [FIL_607] (rows=286549727 width=127) + predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_12_date_dim_d_date_sk_min) AND DynamicValue(RS_12_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_12_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_2] (rows=287989836 width=127) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_ext_sales_price"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_606] + Group By Operator [GBY_605] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_579] - Group By Operator [GBY_573] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_599] + Group By Operator [GBY_593] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_563] (rows=652 width=4) + Select Operator [SEL_582] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_556] - <-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_518] + Please refer to the previous Select Operator [SEL_577] + <-Reducer 21 [CONTAINS] + Reduce Output Operator [RS_554] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_517] (rows=621178955 width=131) + Group By Operator [GBY_553] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_515] (rows=170474971 width=131) + Select Operator [SEL_551] (rows=450703984 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_514] (rows=170474971 width=234) - Conds:RS_17._col1, _col2=RS_598._col0, _col1(Left Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_598] + Merge Join Operator [MERGEJOIN_550] (rows=450703984 width=204) + Conds:RS_645._col0, _col1=RS_41._col1, _col2(Right Outer),Output:["_col2","_col3","_col7","_col8","_col11","_col12","_col13","_col14"] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_645] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_597] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + Please refer to the previous Select Operator [SEL_644] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_41] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_496] (rows=96821196 width=138) - Conds:RS_14._col1=RS_591._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_591] + Merge Join Operator [MERGEJOIN_517] (rows=187186493 width=124) + Conds:RS_36._col1=RS_613._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_613] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_590] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_14] + Please refer to the previous Select Operator [SEL_611] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_36] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_495] (rows=101592102 width=122) - Conds:RS_588._col0=RS_560._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 11 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_560] + Merge Join Operator [MERGEJOIN_516] (rows=196410188 width=109) + Conds:RS_643._col0=RS_583._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_583] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_556] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_588] + Please refer to the previous Select Operator [SEL_577] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_643] PartitionCols:_col0 - Select Operator [SEL_587] (rows=286549727 width=127) + Select Operator [SEL_642] (rows=550076554 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_586] (rows=286549727 width=127) - predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_12_date_dim_d_date_sk_min) AND DynamicValue(RS_12_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_12_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=287989836 width=127) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_ext_sales_price"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_585] - Group By Operator [GBY_584] (rows=1 width=12) + Filter Operator [FIL_641] (rows=550076554 width=122) + predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_34_date_dim_d_date_sk_min) AND DynamicValue(RS_34_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_34_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_24] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_ext_sales_price"] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_640] + Group By Operator [GBY_639] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_578] - Group By Operator [GBY_572] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_600] + Group By Operator [GBY_594] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_561] (rows=652 width=4) + Select Operator [SEL_584] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_556] + Please refer to the previous Select Operator [SEL_577] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query80.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query80.q.out index c90b253628..e6fc709a88 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query80.q.out @@ -217,321 +217,321 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 13 (BROADCAST_EDGE) -Map 29 <- Reducer 19 (BROADCAST_EDGE) -Map 33 <- Reducer 25 (BROADCAST_EDGE) -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 12 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) -Reducer 15 <- Map 26 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Map 27 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 17 <- Map 32 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 8 (CONTAINS) -Reducer 19 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 20 <- Map 12 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) -Reducer 21 <- Map 26 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Map 27 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 23 <- Map 36 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Reducer 23 (SIMPLE_EDGE), Union 8 (CONTAINS) -Reducer 25 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 29 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) +Map 18 <- Reducer 23 (BROADCAST_EDGE) +Map 30 <- Reducer 25 (BROADCAST_EDGE) +Map 33 <- Reducer 27 (BROADCAST_EDGE) +Reducer 10 <- Map 7 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 11 <- Map 28 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 29 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 14 <- Map 7 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) +Reducer 15 <- Map 28 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Map 36 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 20 <- Map 22 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 22 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 25 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 22 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) +Reducer 27 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 31 <- Map 30 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) Reducer 34 <- Map 33 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) -Reducer 4 <- Map 26 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 27 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 28 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE), Union 8 (CONTAINS) -Reducer 9 <- Union 8 (SIMPLE_EDGE) +Reducer 5 <- Union 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 9 <- Map 28 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 10 vectorized - File Output Operator [FS_435] - Limit [LIM_434] (rows=100 width=619) + Reducer 6 vectorized + File Output Operator [FS_438] + Limit [LIM_437] (rows=100 width=619) Number of rows:100 - Select Operator [SEL_433] (rows=59581 width=619) + Select Operator [SEL_436] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_432] - Select Operator [SEL_431] (rows=59581 width=619) + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_435] + Select Operator [SEL_434] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_430] (rows=59581 width=627) + Group By Operator [GBY_433] (rows=59581 width=627) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 8 [SIMPLE_EDGE] - <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_449] + <-Union 4 [SIMPLE_EDGE] + <-Reducer 13 [CONTAINS] vectorized + Reduce Output Operator [RS_452] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_448] (rows=59581 width=627) + Group By Operator [GBY_451] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_447] (rows=39721 width=618) + Top N Key Operator [TNK_450] (rows=39721 width=618) keys:_col0, _col1,top n:100 - Select Operator [SEL_446] (rows=38846 width=619) + Select Operator [SEL_449] (rows=38846 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_445] (rows=38846 width=436) + Group By Operator [GBY_448] (rows=38846 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_71] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_75] PartitionCols:_col0 - Group By Operator [GBY_70] (rows=427306 width=436) + Group By Operator [GBY_74] (rows=427306 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 - Select Operator [SEL_68] (rows=8592843 width=305) + Select Operator [SEL_72] (rows=8592843 width=305) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_368] (rows=8592843 width=305) - Conds:RS_65._col1=RS_444._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_444] + Merge Join Operator [MERGEJOIN_371] (rows=8592843 width=305) + Conds:RS_447._col0=RS_70._col2(Inner),Output:["_col1","_col8","_col9","_col12","_col13"] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_447] PartitionCols:_col0 - Select Operator [SEL_443] (rows=46000 width=104) + Select Operator [SEL_446] (rows=46000 width=104) Output:["_col0","_col1"] - TableScan [TS_51] (rows=46000 width=104) + TableScan [TS_39] (rows=46000 width=104) default@catalog_page,catalog_page,Tbl:COMPLETE,Col:COMPLETE,Output:["cp_catalog_page_sk","cp_catalog_page_id"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_65] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_367] (rows=8592843 width=208) - Conds:RS_62._col3=RS_421._col0(Inner),Output:["_col1","_col5","_col6","_col9","_col10"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_421] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_370] (rows=8592843 width=208) + Conds:RS_65._col4=RS_426._col0(Inner),Output:["_col2","_col6","_col7","_col10","_col11"] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_426] PartitionCols:_col0 - Select Operator [SEL_419] (rows=1150 width=4) + Select Operator [SEL_424] (rows=1150 width=4) Output:["_col0"] - Filter Operator [FIL_418] (rows=1150 width=89) + Filter Operator [FIL_423] (rows=1150 width=89) predicate:(p_channel_tv = 'N') - TableScan [TS_11] (rows=2300 width=89) + TableScan [TS_20] (rows=2300 width=89) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_tv"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_62] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_366] (rows=17185686 width=222) - Conds:RS_59._col2=RS_416._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col9","_col10"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_416] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_369] (rows=17185686 width=222) + Conds:RS_400._col0=RS_63._col2(Inner),Output:["_col2","_col4","_col6","_col7","_col10","_col11"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_400] PartitionCols:_col0 - Select Operator [SEL_414] (rows=154000 width=4) + Select Operator [SEL_398] (rows=154000 width=4) Output:["_col0"] - Filter Operator [FIL_413] (rows=154000 width=115) + Filter Operator [FIL_397] (rows=154000 width=115) predicate:(i_current_price > 50) - TableScan [TS_8] (rows=462000 width=115) + TableScan [TS_2] (rows=462000 width=115) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_59] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_63] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_365] (rows=51557056 width=232) - Conds:RS_56._col0=RS_396._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_396] + Merge Join Operator [MERGEJOIN_368] (rows=51557056 width=232) + Conds:RS_55._col0=RS_406._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_406] PartitionCols:_col0 - Select Operator [SEL_393] (rows=8116 width=4) + Select Operator [SEL_403] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_392] (rows=8116 width=98) + Filter Operator [FIL_402] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' - TableScan [TS_5] (rows=73049 width=98) + TableScan [TS_10] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_56] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_55] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_364] (rows=464045263 width=326) - Conds:RS_440._col2, _col4=RS_442._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_440] + Merge Join Operator [MERGEJOIN_367] (rows=464045263 width=326) + Conds:RS_443._col2, _col4=RS_445._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_443] PartitionCols:_col2, _col4 - Select Operator [SEL_439] (rows=283691906 width=243) + Select Operator [SEL_442] (rows=283691906 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_438] (rows=283691906 width=243) - predicate:(cs_promo_sk is not null and cs_sold_date_sk is not null and cs_catalog_page_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_37] (rows=287989836 width=243) + Filter Operator [FIL_441] (rows=283691906 width=243) + predicate:(cs_promo_sk is not null and cs_sold_date_sk is not null and cs_catalog_page_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_56_date_dim_d_date_sk_min) AND DynamicValue(RS_56_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_56_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_44] (rows=287989836 width=243) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_catalog_page_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_ext_sales_price","cs_net_profit"] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_437] - Group By Operator [GBY_436] (rows=1 width=12) + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_440] + Group By Operator [GBY_439] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_404] - Group By Operator [GBY_401] (rows=1 width=12) + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_414] + Group By Operator [GBY_411] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_397] (rows=8116 width=4) + Select Operator [SEL_407] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_393] - <-Map 31 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_442] + Please refer to the previous Select Operator [SEL_403] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_445] PartitionCols:_col0, _col1 - Select Operator [SEL_441] (rows=28798881 width=227) + Select Operator [SEL_444] (rows=28798881 width=227) Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_40] (rows=28798881 width=227) + TableScan [TS_47] (rows=28798881 width=227) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_amount","cr_net_loss"] - <-Reducer 24 [CONTAINS] vectorized - Reduce Output Operator [RS_463] + <-Reducer 17 [CONTAINS] vectorized + Reduce Output Operator [RS_466] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_462] (rows=59581 width=627) + Group By Operator [GBY_465] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_461] (rows=39721 width=618) + Top N Key Operator [TNK_464] (rows=39721 width=618) keys:_col0, _col1,top n:100 - Select Operator [SEL_460] (rows=53 width=615) + Select Operator [SEL_463] (rows=53 width=615) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_459] (rows=53 width=436) + Group By Operator [GBY_462] (rows=53 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_109] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_115] PartitionCols:_col0 - Group By Operator [GBY_108] (rows=318 width=436) + Group By Operator [GBY_114] (rows=318 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 - Select Operator [SEL_106] (rows=4714659 width=323) + Select Operator [SEL_112] (rows=4714659 width=323) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_373] (rows=4714659 width=323) - Conds:RS_103._col2=RS_458._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] + Merge Join Operator [MERGEJOIN_376] (rows=4714659 width=323) + Conds:RS_109._col4=RS_461._col0(Inner),Output:["_col7","_col8","_col11","_col12","_col15"] <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_458] + SHUFFLE [RS_461] PartitionCols:_col0 - Select Operator [SEL_457] (rows=84 width=104) + Select Operator [SEL_460] (rows=84 width=104) Output:["_col0","_col1"] - TableScan [TS_89] (rows=84 width=104) + TableScan [TS_104] (rows=84 width=104) default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_site_id"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_103] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_372] (rows=4714659 width=227) - Conds:RS_100._col3=RS_422._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_422] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_109] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_375] (rows=4714659 width=227) + Conds:RS_427._col0=RS_107._col4(Inner),Output:["_col4","_col7","_col8","_col11","_col12"] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_427] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_419] - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_100] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_371] (rows=9429318 width=231) - Conds:RS_97._col1=RS_417._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_417] + Please refer to the previous Select Operator [SEL_424] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_107] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_374] (rows=9429318 width=231) + Conds:RS_401._col0=RS_101._col1(Inner),Output:["_col3","_col4","_col6","_col7","_col10","_col11"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_401] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_414] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_97] + Please refer to the previous Select Operator [SEL_398] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_101] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_370] (rows=28287952 width=235) - Conds:RS_94._col0=RS_398._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_398] + Merge Join Operator [MERGEJOIN_373] (rows=28287952 width=235) + Conds:RS_96._col0=RS_408._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_408] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_393] + Please refer to the previous Select Operator [SEL_403] <-Reducer 34 [SIMPLE_EDGE] - SHUFFLE [RS_94] + SHUFFLE [RS_96] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_369] (rows=254608997 width=363) - Conds:RS_454._col1, _col4=RS_456._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_372] (rows=254608997 width=363) + Conds:RS_457._col1, _col4=RS_459._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_454] + SHUFFLE [RS_457] PartitionCols:_col1, _col4 - Select Operator [SEL_453] (rows=143894769 width=243) + Select Operator [SEL_456] (rows=143894769 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_452] (rows=143894769 width=243) - predicate:(ws_promo_sk is not null and ws_web_site_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_95_date_dim_d_date_sk_min) AND DynamicValue(RS_95_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_95_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_75] (rows=144002668 width=243) + Filter Operator [FIL_455] (rows=143894769 width=243) + predicate:(ws_promo_sk is not null and ws_web_site_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_97_date_dim_d_date_sk_min) AND DynamicValue(RS_97_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_97_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_85] (rows=144002668 width=243) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_site_sk","ws_promo_sk","ws_order_number","ws_ext_sales_price","ws_net_profit"] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_451] - Group By Operator [GBY_450] (rows=1 width=12) + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_454] + Group By Operator [GBY_453] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_405] - Group By Operator [GBY_402] (rows=1 width=12) + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_415] + Group By Operator [GBY_412] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_399] (rows=8116 width=4) + Select Operator [SEL_409] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_393] + Please refer to the previous Select Operator [SEL_403] <-Map 35 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_456] + SHUFFLE [RS_459] PartitionCols:_col0, _col1 - Select Operator [SEL_455] (rows=14398467 width=221) + Select Operator [SEL_458] (rows=14398467 width=221) Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_78] (rows=14398467 width=221) + TableScan [TS_88] (rows=14398467 width=221) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_amt","wr_net_loss"] - <-Reducer 7 [CONTAINS] vectorized - Reduce Output Operator [RS_429] + <-Reducer 3 [CONTAINS] vectorized + Reduce Output Operator [RS_432] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_428] (rows=59581 width=627) + Group By Operator [GBY_431] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_427] (rows=39721 width=618) + Top N Key Operator [TNK_430] (rows=39721 width=618) keys:_col0, _col1,top n:100 - Select Operator [SEL_426] (rows=822 width=617) + Select Operator [SEL_429] (rows=822 width=617) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_425] (rows=822 width=436) + Group By Operator [GBY_428] (rows=822 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_34] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_36] PartitionCols:_col0 - Group By Operator [GBY_33] (rows=4932 width=436) + Group By Operator [GBY_35] (rows=4932 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 - Select Operator [SEL_31] (rows=15038783 width=100) + Select Operator [SEL_33] (rows=15038783 width=100) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_363] (rows=15038783 width=100) - Conds:RS_28._col2=RS_424._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_424] + Merge Join Operator [MERGEJOIN_366] (rows=15038783 width=100) + Conds:RS_396._col0=RS_31._col3(Inner),Output:["_col1","_col8","_col9","_col12","_col13"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_396] PartitionCols:_col0 - Select Operator [SEL_423] (rows=1704 width=104) + Select Operator [SEL_395] (rows=1704 width=104) Output:["_col0","_col1"] - TableScan [TS_14] (rows=1704 width=104) + TableScan [TS_0] (rows=1704 width=104) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_28] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_362] (rows=15038783 width=0) - Conds:RS_25._col3=RS_420._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_420] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_365] (rows=15038783 width=0) + Conds:RS_26._col4=RS_425._col0(Inner),Output:["_col3","_col6","_col7","_col10","_col11"] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_425] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_419] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_361] (rows=30077566 width=57) - Conds:RS_22._col1=RS_415._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_415] + Please refer to the previous Select Operator [SEL_424] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_364] (rows=30077566 width=57) + Conds:RS_399._col0=RS_24._col1(Inner),Output:["_col3","_col4","_col6","_col7","_col10","_col11"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_399] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_414] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_22] + Please refer to the previous Select Operator [SEL_398] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_360] (rows=90232695 width=177) - Conds:RS_19._col0=RS_394._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_394] + Merge Join Operator [MERGEJOIN_363] (rows=90232695 width=177) + Conds:RS_16._col0=RS_404._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_404] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_393] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_19] + Please refer to the previous Select Operator [SEL_403] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_16] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_359] (rows=812149846 width=374) - Conds:RS_410._col1, _col4=RS_412._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_410] + Merge Join Operator [MERGEJOIN_362] (rows=812149846 width=374) + Conds:RS_420._col1, _col4=RS_422._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_420] PartitionCols:_col1, _col4 - Select Operator [SEL_409] (rows=501693263 width=233) + Select Operator [SEL_419] (rows=501693263 width=233) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_408] (rows=501693263 width=233) - predicate:(ss_sold_date_sk is not null and ss_promo_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=233) + Filter Operator [FIL_418] (rows=501693263 width=233) + predicate:(ss_sold_date_sk is not null and ss_promo_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_17_date_dim_d_date_sk_min) AND DynamicValue(RS_17_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_17_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_5] (rows=575995635 width=233) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_ext_sales_price","ss_net_profit"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_407] - Group By Operator [GBY_406] (rows=1 width=12) + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_417] + Group By Operator [GBY_416] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_403] - Group By Operator [GBY_400] (rows=1 width=12) + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_413] + Group By Operator [GBY_410] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_395] (rows=8116 width=4) + Select Operator [SEL_405] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_393] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_412] + Please refer to the previous Select Operator [SEL_403] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_422] PartitionCols:_col0, _col1 - Select Operator [SEL_411] (rows=57591150 width=224) + Select Operator [SEL_421] (rows=57591150 width=224) Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_3] (rows=57591150 width=224) + TableScan [TS_8] (rows=57591150 width=224) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_amt","sr_net_loss"] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query81.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query81.q.out index e07e6a2f77..d128146558 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query81.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query81.q.out @@ -71,156 +71,156 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 14 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 12 <- Map 15 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 10 <- Map 6 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 14 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 7 <- Map 14 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 8 <- Map 15 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 11 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 4 vectorized - File Output Operator [FS_216] - Select Operator [SEL_215] (rows=100 width=1503) + File Output Operator [FS_218] + Select Operator [SEL_217] (rows=100 width=1503) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - Limit [LIM_214] (rows=100 width=1417) + Limit [LIM_216] (rows=100 width=1417) Number of rows:100 - Select Operator [SEL_213] (rows=1609248 width=1416) + Select Operator [SEL_215] (rows=1609248 width=1416) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_66] - Select Operator [SEL_65] (rows=1609248 width=1416) + SHUFFLE [RS_68] + Select Operator [SEL_67] (rows=1609248 width=1416) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - Top N Key Operator [TNK_105] (rows=1609248 width=1418) + Top N Key Operator [TNK_107] (rows=1609248 width=1418) keys:_col1, _col3, _col4, _col5, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col19,top n:100 - Merge Join Operator [MERGEJOIN_182] (rows=1609248 width=1418) - Conds:RS_62._col0=RS_63._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col19"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_63] + Merge Join Operator [MERGEJOIN_184] (rows=1609248 width=1418) + Conds:RS_64._col0=RS_65._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col19"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_64] PartitionCols:_col0 - Select Operator [SEL_58] (rows=1609248 width=227) + Merge Join Operator [MERGEJOIN_178] (rows=1568628 width=1310) + Conds:RS_187._col2=RS_190._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_187] + PartitionCols:_col2 + Select Operator [SEL_186] (rows=80000000 width=375) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_185] (rows=80000000 width=375) + predicate:c_current_addr_sk is not null + TableScan [TS_0] (rows=80000000 width=375) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_190] + PartitionCols:_col0 + Select Operator [SEL_189] (rows=784314 width=941) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_188] (rows=784314 width=1027) + predicate:(ca_state = 'IL') + TableScan [TS_3] (rows=40000000 width=1027) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_street_type","ca_suite_number","ca_city","ca_county","ca_state","ca_zip","ca_country","ca_gmt_offset","ca_location_type"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col0 + Select Operator [SEL_60] (rows=1609248 width=227) Output:["_col0","_col2"] - Filter Operator [FIL_57] (rows=1609248 width=227) + Filter Operator [FIL_59] (rows=1609248 width=227) predicate:(_col2 > _col3) - Merge Join Operator [MERGEJOIN_181] (rows=4827746 width=227) - Conds:RS_206._col1=RS_212._col1(Inner),Output:["_col0","_col2","_col3"] - <-Reducer 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] + Merge Join Operator [MERGEJOIN_183] (rows=4827746 width=227) + Conds:RS_208._col1=RS_214._col1(Inner),Output:["_col0","_col2","_col3"] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_214] PartitionCols:_col1 - Select Operator [SEL_211] (rows=12 width=198) + Select Operator [SEL_213] (rows=12 width=198) Output:["_col0","_col1"] - Filter Operator [FIL_210] (rows=12 width=206) + Filter Operator [FIL_212] (rows=12 width=206) predicate:CAST( (_col1 / _col2) AS decimal(21,6)) is not null - Group By Operator [GBY_209] (rows=12 width=206) + Group By Operator [GBY_211] (rows=12 width=206) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col0 - Select Operator [SEL_208] (rows=5266632 width=201) + Select Operator [SEL_210] (rows=5266632 width=201) Output:["_col0","_col2"] - Group By Operator [GBY_207] (rows=5266632 width=201) + Group By Operator [GBY_209] (rows=5266632 width=201) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_45] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_47] PartitionCols:_col0 - Group By Operator [GBY_44] (rows=8749496 width=201) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col6, _col1 - Merge Join Operator [MERGEJOIN_180] (rows=8749496 width=194) - Conds:RS_40._col2=RS_202._col0(Inner),Output:["_col1","_col3","_col6"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_202] + Group By Operator [GBY_46] (rows=8749496 width=201) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col5)"],keys:_col1, _col3 + Merge Join Operator [MERGEJOIN_182] (rows=8749496 width=194) + Conds:RS_194._col0=RS_43._col2(Inner),Output:["_col1","_col3","_col5"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_194] PartitionCols:_col0 - Select Operator [SEL_200] (rows=40000000 width=90) + Select Operator [SEL_192] (rows=40000000 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_199] (rows=40000000 width=90) + Filter Operator [FIL_191] (rows=40000000 width=90) predicate:ca_state is not null - TableScan [TS_12] (rows=40000000 width=90) + TableScan [TS_6] (rows=40000000 width=90) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_40] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_43] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_179] (rows=8749496 width=112) - Conds:RS_194._col0=RS_198._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] + Merge Join Operator [MERGEJOIN_181] (rows=8749496 width=112) + Conds:RS_200._col0=RS_204._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_200] PartitionCols:_col0 - Select Operator [SEL_196] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_195] (rows=652 width=8) - predicate:(d_year = 1998) - TableScan [TS_9] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_194] - PartitionCols:_col0 - Select Operator [SEL_192] (rows=28221532 width=121) + Select Operator [SEL_198] (rows=28221532 width=121) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_190] (rows=28221532 width=121) + Filter Operator [FIL_196] (rows=28221532 width=121) predicate:(cr_returning_addr_sk is not null and cr_returned_date_sk is not null) - TableScan [TS_6] (rows=28798881 width=121) + TableScan [TS_9] (rows=28798881 width=121) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_206] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_204] + PartitionCols:_col0 + Select Operator [SEL_202] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_201] (rows=652 width=8) + predicate:(d_year = 1998) + TableScan [TS_12] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_208] PartitionCols:_col1 - Filter Operator [FIL_205] (rows=4827746 width=201) + Filter Operator [FIL_207] (rows=4827746 width=201) predicate:_col2 is not null - Select Operator [SEL_204] (rows=4827746 width=201) + Select Operator [SEL_206] (rows=4827746 width=201) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_203] (rows=4827746 width=201) + Group By Operator [GBY_205] (rows=4827746 width=201) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_23] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=8574602 width=201) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col6, _col1 - Merge Join Operator [MERGEJOIN_178] (rows=8574602 width=194) - Conds:RS_18._col2=RS_201._col0(Inner),Output:["_col1","_col3","_col6"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] + Group By Operator [GBY_23] (rows=8574602 width=201) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col5)"],keys:_col1, _col3 + Merge Join Operator [MERGEJOIN_180] (rows=8574602 width=194) + Conds:RS_193._col0=RS_20._col2(Inner),Output:["_col1","_col3","_col5"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_193] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_200] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_18] + Please refer to the previous Select Operator [SEL_192] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_20] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_177] (rows=8574602 width=112) - Conds:RS_193._col0=RS_197._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_197] + Merge Join Operator [MERGEJOIN_179] (rows=8574602 width=112) + Conds:RS_199._col0=RS_203._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_199] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_196] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] - PartitionCols:_col0 - Select Operator [SEL_191] (rows=27657410 width=121) + Select Operator [SEL_197] (rows=27657410 width=121) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_189] (rows=27657410 width=121) + Filter Operator [FIL_195] (rows=27657410 width=121) predicate:(cr_returning_addr_sk is not null and cr_returning_customer_sk is not null and cr_returned_date_sk is not null) - Please refer to the previous TableScan [TS_6] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_62] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_176] (rows=1568628 width=1310) - Conds:RS_185._col2=RS_188._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_185] - PartitionCols:_col2 - Select Operator [SEL_184] (rows=80000000 width=375) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_183] (rows=80000000 width=375) - predicate:c_current_addr_sk is not null - TableScan [TS_0] (rows=80000000 width=375) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name"] - <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_188] - PartitionCols:_col0 - Select Operator [SEL_187] (rows=784314 width=941) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_186] (rows=784314 width=1027) - predicate:(ca_state = 'IL') - TableScan [TS_3] (rows=40000000 width=1027) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_street_type","ca_suite_number","ca_city","ca_county","ca_state","ca_zip","ca_country","ca_gmt_offset","ca_location_type"] + Please refer to the previous TableScan [TS_9] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_203] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_202] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query83.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query83.q.out index 12f33788c2..846a073c61 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query83.q.out @@ -145,192 +145,192 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 20 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 22 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 13 <- Map 20 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) -Reducer 3 <- Map 19 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 20 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 14 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 21 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 10 <- Map 21 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 20 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 22 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 14 <- Map 20 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 3 <- Map 20 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_399] - Limit [LIM_398] (rows=100 width=260) + Reducer 7 vectorized + File Output Operator [FS_402] + Limit [LIM_401] (rows=100 width=260) Number of rows:100 - Select Operator [SEL_397] (rows=1260 width=260) + Select Operator [SEL_400] (rows=1260 width=260) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_121] - Select Operator [SEL_120] (rows=1260 width=260) + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_124] + Select Operator [SEL_123] (rows=1260 width=260) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Top N Key Operator [TNK_207] (rows=1260 width=132) + Top N Key Operator [TNK_210] (rows=1260 width=132) keys:_col0, _col3,top n:100 - Merge Join Operator [MERGEJOIN_364] (rows=1260 width=132) - Conds:RS_117._col0=RS_396._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6"] - <-Reducer 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_396] + Merge Join Operator [MERGEJOIN_367] (rows=1260 width=132) + Conds:RS_120._col0=RS_399._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6"] + <-Reducer 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_399] PartitionCols:_col0 - Select Operator [SEL_395] (rows=1260 width=116) + Select Operator [SEL_398] (rows=1260 width=116) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_394] (rows=1260 width=108) + Group By Operator [GBY_397] (rows=1260 width=108) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_111] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_114] PartitionCols:_col0 - Group By Operator [GBY_110] (rows=1260 width=108) - Output:["_col0","_col1"],aggregations:["sum(_col5)"],keys:_col7 - Merge Join Operator [MERGEJOIN_362] (rows=2521 width=100) - Conds:RS_106._col4=RS_383._col0(Inner),Output:["_col5","_col7"] + Group By Operator [GBY_113] (rows=1260 width=108) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 + Merge Join Operator [MERGEJOIN_365] (rows=2521 width=100) + Conds:RS_109._col1=RS_386._col0(Inner),Output:["_col2","_col7"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_383] + SHUFFLE [RS_386] PartitionCols:_col0 - Select Operator [SEL_380] (rows=462000 width=104) + Select Operator [SEL_383] (rows=462000 width=104) Output:["_col0","_col1"] - TableScan [TS_22] (rows=462000 width=104) + TableScan [TS_26] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_106] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_361] (rows=2521 width=4) - Conds:RS_103._col0=RS_393._col0(Inner),Output:["_col4","_col5"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_103] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_109] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_364] (rows=2521 width=4) + Conds:RS_396._col0=RS_107._col0(Inner),Output:["_col1","_col2"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_107] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_352] (rows=2 width=4) - Conds:RS_367._col1=RS_376._col0(Inner),Output:["_col0"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_367] + Merge Join Operator [MERGEJOIN_355] (rows=2 width=4) + Conds:RS_373._col1=RS_382._col0(Inner),Output:["_col0"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_373] PartitionCols:_col1 - Select Operator [SEL_366] (rows=73049 width=98) + Select Operator [SEL_372] (rows=73049 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_365] (rows=73049 width=98) + Filter Operator [FIL_371] (rows=73049 width=98) predicate:d_date is not null - TableScan [TS_0] (rows=73049 width=98) + TableScan [TS_3] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_376] + <-Reducer 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_382] PartitionCols:_col0 - Group By Operator [GBY_375] (rows=2 width=94) + Group By Operator [GBY_381] (rows=2 width=94) Output:["_col0"],keys:KEY._col0 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_16] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_19] PartitionCols:_col0 - Group By Operator [GBY_15] (rows=2 width=94) + Group By Operator [GBY_18] (rows=2 width=94) Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_351] (rows=5 width=94) - Conds:RS_370._col1=RS_374._col0(Left Semi),Output:["_col0"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_370] + Merge Join Operator [MERGEJOIN_354] (rows=5 width=94) + Conds:RS_376._col1=RS_380._col0(Left Semi),Output:["_col0"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_376] PartitionCols:_col1 - Select Operator [SEL_369] (rows=73049 width=98) + Select Operator [SEL_375] (rows=73049 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_368] (rows=73049 width=98) + Filter Operator [FIL_374] (rows=73049 width=98) predicate:(d_week_seq is not null and d_date is not null) - TableScan [TS_3] (rows=73049 width=98) + TableScan [TS_6] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date","d_week_seq"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_374] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_380] PartitionCols:_col0 - Group By Operator [GBY_373] (rows=1 width=4) + Group By Operator [GBY_379] (rows=1 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_372] (rows=2 width=4) + Select Operator [SEL_378] (rows=2 width=4) Output:["_col0"] - Filter Operator [FIL_371] (rows=2 width=98) + Filter Operator [FIL_377] (rows=2 width=98) predicate:((d_date) IN ('1998-01-02', '1998-10-15', '1998-11-10') and d_week_seq is not null) - TableScan [TS_6] (rows=73049 width=98) + TableScan [TS_9] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date","d_week_seq"] <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_393] + SHUFFLE [RS_396] PartitionCols:_col0 - Select Operator [SEL_392] (rows=13749816 width=11) + Select Operator [SEL_395] (rows=13749816 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_391] (rows=13749816 width=11) + Filter Operator [FIL_394] (rows=13749816 width=11) predicate:wr_returned_date_sk is not null - TableScan [TS_95] (rows=14398467 width=11) + TableScan [TS_78] (rows=14398467 width=11) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_item_sk","wr_return_quantity"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_117] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_120] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_363] (rows=2739 width=116) - Conds:RS_385._col0=RS_390._col0(Inner),Output:["_col0","_col1","_col3"] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_390] + Merge Join Operator [MERGEJOIN_366] (rows=2739 width=116) + Conds:RS_388._col0=RS_393._col0(Inner),Output:["_col0","_col1","_col3"] + <-Reducer 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_393] PartitionCols:_col0 - Group By Operator [GBY_389] (rows=5552 width=108) + Group By Operator [GBY_392] (rows=5552 width=108) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_73] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_75] PartitionCols:_col0 - Group By Operator [GBY_72] (rows=5552 width=108) - Output:["_col0","_col1"],aggregations:["sum(_col5)"],keys:_col7 - Merge Join Operator [MERGEJOIN_358] (rows=11105 width=100) - Conds:RS_68._col4=RS_382._col0(Inner),Output:["_col5","_col7"] + Group By Operator [GBY_74] (rows=5552 width=108) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 + Merge Join Operator [MERGEJOIN_361] (rows=11105 width=100) + Conds:RS_70._col1=RS_385._col0(Inner),Output:["_col2","_col7"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_382] + SHUFFLE [RS_385] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_380] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_68] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_357] (rows=11105 width=4) - Conds:RS_65._col0=RS_388._col0(Inner),Output:["_col4","_col5"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_65] + Please refer to the previous Select Operator [SEL_383] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_360] (rows=11105 width=4) + Conds:RS_391._col0=RS_68._col0(Inner),Output:["_col1","_col2"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_68] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_352] + Please refer to the previous Merge Join Operator [MERGEJOIN_355] <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_388] + SHUFFLE [RS_391] PartitionCols:_col0 - Select Operator [SEL_387] (rows=55578005 width=11) + Select Operator [SEL_390] (rows=55578005 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_386] (rows=55578005 width=11) + Filter Operator [FIL_389] (rows=55578005 width=11) predicate:sr_returned_date_sk is not null - TableScan [TS_57] (rows=57591150 width=11) + TableScan [TS_39] (rows=57591150 width=11) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_return_quantity"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_385] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_388] PartitionCols:_col0 - Group By Operator [GBY_384] (rows=2739 width=108) + Group By Operator [GBY_387] (rows=2739 width=108) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_35] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_36] PartitionCols:_col0 - Group By Operator [GBY_34] (rows=2739 width=108) - Output:["_col0","_col1"],aggregations:["sum(_col5)"],keys:_col7 - Merge Join Operator [MERGEJOIN_354] (rows=5478 width=100) - Conds:RS_30._col4=RS_381._col0(Inner),Output:["_col5","_col7"] + Group By Operator [GBY_35] (rows=2739 width=108) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 + Merge Join Operator [MERGEJOIN_357] (rows=5478 width=100) + Conds:RS_31._col1=RS_384._col0(Inner),Output:["_col2","_col7"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_381] + SHUFFLE [RS_384] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_380] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_353] (rows=5478 width=4) - Conds:RS_27._col0=RS_379._col0(Inner),Output:["_col4","_col5"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_27] + Please refer to the previous Select Operator [SEL_383] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_356] (rows=5478 width=4) + Conds:RS_370._col0=RS_29._col0(Inner),Output:["_col1","_col2"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_29] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_352] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_379] + Please refer to the previous Merge Join Operator [MERGEJOIN_355] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_370] PartitionCols:_col0 - Select Operator [SEL_378] (rows=28798881 width=11) + Select Operator [SEL_369] (rows=28798881 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_377] (rows=28798881 width=11) + Filter Operator [FIL_368] (rows=28798881 width=11) predicate:cr_returned_date_sk is not null - TableScan [TS_19] (rows=28798881 width=11) + TableScan [TS_0] (rows=28798881 width=11) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_item_sk","cr_return_quantity"] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out index 199d8c8986..2c5aca3962 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out @@ -183,138 +183,138 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) -Reducer 10 <- Map 13 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 14 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 14 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Map 5 <- Reducer 9 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 12 <- Map 10 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 7 <- Reducer 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_209] - Limit [LIM_208] (rows=72 width=656) + Reducer 4 vectorized + File Output Operator [FS_211] + Limit [LIM_210] (rows=72 width=656) Number of rows:100 - Select Operator [SEL_207] (rows=72 width=656) + Select Operator [SEL_209] (rows=72 width=656) Output:["_col0","_col1","_col2","_col3"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_206] - Select Operator [SEL_205] (rows=72 width=656) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_208] + Select Operator [SEL_207] (rows=72 width=656) Output:["_col4","_col5","_col6","_col7"] - Top N Key Operator [TNK_204] (rows=72 width=353) + Top N Key Operator [TNK_206] (rows=72 width=353) keys:substr(_col0, 1, 20), (UDFToDouble(_col1) / _col2), CAST( (_col3 / _col4) AS decimal(11,6)), CAST( (_col5 / _col6) AS decimal(11,6)),top n:100 - Group By Operator [GBY_203] (rows=72 width=353) + Group By Operator [GBY_205] (rows=72 width=353) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_43] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_45] PartitionCols:_col0 - Group By Operator [GBY_42] (rows=288 width=353) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)","count(_col3)","sum(_col27)","count(_col27)","sum(_col26)","count(_col26)"],keys:_col36 - Merge Join Operator [MERGEJOIN_178] (rows=2912400 width=313) - Conds:RS_38._col24=RS_202._col0(Inner),Output:["_col3","_col26","_col27","_col36"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_202] + Group By Operator [GBY_44] (rows=288 width=353) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col5)","count(_col5)","sum(_col29)","count(_col29)","sum(_col28)","count(_col28)"],keys:_col1 + Merge Join Operator [MERGEJOIN_180] (rows=2912400 width=313) + Conds:RS_182._col0=RS_41._col24(Inner),Output:["_col1","_col5","_col28","_col29"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_182] PartitionCols:_col0 - Select Operator [SEL_201] (rows=72 width=101) + Select Operator [SEL_181] (rows=72 width=101) Output:["_col0","_col1"] - TableScan [TS_28] (rows=72 width=101) + TableScan [TS_0] (rows=72 width=101) default@reason,reason,Tbl:COMPLETE,Col:COMPLETE,Output:["r_reason_sk","r_reason_desc"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_38] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_41] PartitionCols:_col24 - Filter Operator [FIL_36] (rows=2912400 width=280) - predicate:(((_col29 and _col4) or (_col30 and _col5) or (_col31 and _col6)) and ((_col14 and _col15 and _col7) or (_col16 and _col17 and _col8) or (_col18 and _col19 and _col9))) - Merge Join Operator [MERGEJOIN_177] (rows=10355208 width=280) - Conds:RS_33._col1, _col2=RS_34._col9, _col14(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col14","_col15","_col16","_col17","_col18","_col19","_col24","_col26","_col27","_col29","_col30","_col31"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col9, _col14 - Select Operator [SEL_27] (rows=1056644 width=155) - Output:["_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col13","_col14","_col15","_col16","_col18","_col19","_col20"] - Merge Join Operator [MERGEJOIN_176] (rows=1056644 width=155) - Conds:RS_24._col1, _col13, _col14=RS_199._col0, _col1, _col2(Inner),Output:["_col0","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col18","_col19","_col20","_col21","_col22","_col23"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_199] + Select Operator [SEL_39] (rows=2912400 width=280) + Output:["_col3","_col24","_col26","_col27"] + Filter Operator [FIL_37] (rows=2912400 width=280) + predicate:(((_col29 and _col4) or (_col30 and _col5) or (_col31 and _col6)) and ((_col14 and _col15 and _col7) or (_col16 and _col17 and _col8) or (_col18 and _col19 and _col9))) + Merge Join Operator [MERGEJOIN_179] (rows=10355208 width=280) + Conds:RS_34._col1, _col2=RS_35._col9, _col14(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col14","_col15","_col16","_col17","_col18","_col19","_col24","_col26","_col27","_col29","_col30","_col31"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col9, _col14 + Merge Join Operator [MERGEJOIN_178] (rows=1056644 width=155) + Conds:RS_197._col0, _col1, _col2=RS_28._col1, _col13, _col14(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col13","_col14","_col15","_col16","_col18","_col19","_col20"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_197] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_197] (rows=265971 width=207) + Select Operator [SEL_195] (rows=265971 width=207) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_196] (rows=265971 width=183) + Filter Operator [FIL_194] (rows=265971 width=183) predicate:((cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree')) - TableScan [TS_15] (rows=1861800 width=183) + TableScan [TS_8] (rows=1861800 width=183) default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_24] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_28] PartitionCols:_col1, _col13, _col14 - Merge Join Operator [MERGEJOIN_175] (rows=1056644 width=312) - Conds:RS_21._col3=RS_200._col0(Inner),Output:["_col0","_col1","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col13","_col14"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_200] + Merge Join Operator [MERGEJOIN_177] (rows=1056644 width=312) + Conds:RS_23._col3=RS_198._col0(Inner),Output:["_col0","_col1","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col13","_col14"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_198] PartitionCols:_col0 - Select Operator [SEL_198] (rows=265971 width=183) + Select Operator [SEL_196] (rows=265971 width=183) Output:["_col0","_col1","_col2"] - Please refer to the previous Filter Operator [FIL_196] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_21] + Please refer to the previous Filter Operator [FIL_194] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_23] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_174] (rows=1056644 width=135) - Conds:RS_192._col2=RS_195._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_176] (rows=1056644 width=135) + Conds:RS_201._col2=RS_204._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_195] - PartitionCols:_col0 - Select Operator [SEL_194] (rows=3529412 width=16) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_193] (rows=3529412 width=187) - predicate:((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States')) - TableScan [TS_9] (rows=40000000 width=187) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_192] + SHUFFLE [RS_201] PartitionCols:_col2 - Select Operator [SEL_191] (rows=11975292 width=237) + Select Operator [SEL_200] (rows=11975292 width=237) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_190] (rows=11975292 width=237) + Filter Operator [FIL_199] (rows=11975292 width=237) predicate:(wr_refunded_addr_sk is not null and wr_returning_cdemo_sk is not null and wr_refunded_cdemo_sk is not null and wr_reason_sk is not null) - TableScan [TS_6] (rows=14398467 width=237) + TableScan [TS_11] (rows=14398467 width=237) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_33] - PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_173] (rows=51392014 width=35) - Conds:RS_189._col0=RS_181._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_181] - PartitionCols:_col0 - Select Operator [SEL_180] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_179] (rows=652 width=8) - predicate:(d_year = 1998) - TableScan [TS_3] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_189] - PartitionCols:_col0 - Select Operator [SEL_188] (rows=143931136 width=39) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Filter Operator [FIL_187] (rows=143931136 width=243) - predicate:(((ws_sales_price >= 100) or (ws_sales_price <= 150) or (ws_sales_price >= 50) or (ws_sales_price <= 100) or (ws_sales_price >= 150) or (ws_sales_price <= 200)) and ((ws_net_profit >= 100) or (ws_net_profit <= 200) or (ws_net_profit >= 150) or (ws_net_profit <= 300) or (ws_net_profit >= 50) or (ws_net_profit <= 250)) and ws_sold_date_sk is not null and ws_web_page_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=243) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_186] - Group By Operator [GBY_185] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_184] - Group By Operator [GBY_183] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_182] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_180] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_204] + PartitionCols:_col0 + Select Operator [SEL_203] (rows=3529412 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_202] (rows=3529412 width=187) + predicate:((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States')) + TableScan [TS_14] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col1, _col2 + Merge Join Operator [MERGEJOIN_175] (rows=51392014 width=35) + Conds:RS_193._col0=RS_185._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_185] + PartitionCols:_col0 + Select Operator [SEL_184] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_183] (rows=652 width=8) + predicate:(d_year = 1998) + TableScan [TS_5] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_193] + PartitionCols:_col0 + Select Operator [SEL_192] (rows=143931136 width=39) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Filter Operator [FIL_191] (rows=143931136 width=243) + predicate:(((ws_sales_price >= 100) or (ws_sales_price <= 150) or (ws_sales_price >= 50) or (ws_sales_price <= 100) or (ws_sales_price >= 150) or (ws_sales_price <= 200)) and ((ws_net_profit >= 100) or (ws_net_profit <= 200) or (ws_net_profit >= 150) or (ws_net_profit <= 300) or (ws_net_profit >= 50) or (ws_net_profit <= 250)) and ws_sold_date_sk is not null and ws_web_page_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_32_date_dim_d_date_sk_min) AND DynamicValue(RS_32_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_32_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_2] (rows=144002668 width=243) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_190] + Group By Operator [GBY_189] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_188] + Group By Operator [GBY_187] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_186] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_184] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query86.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query86.q.out index a243540ca3..ca228e27e0 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query86.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query86.q.out @@ -59,89 +59,89 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Map 6 <- Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_84] - Limit [LIM_83] (rows=100 width=490) + Reducer 5 vectorized + File Output Operator [FS_85] + Limit [LIM_84] (rows=100 width=490) Number of rows:100 - Select Operator [SEL_82] (rows=3060 width=490) + Select Operator [SEL_83] (rows=3060 width=490) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_81] - Select Operator [SEL_80] (rows=3060 width=490) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_82] + Select Operator [SEL_81] (rows=3060 width=490) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Top N Key Operator [TNK_79] (rows=3060 width=302) + Top N Key Operator [TNK_80] (rows=3060 width=302) keys:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN (((grouping(_col3, 1L) + grouping(_col3, 0L)) = 0L)) THEN (_col0) ELSE (null) END, rank_window_0,top n:100 - PTF Operator [PTF_78] (rows=3060 width=302) + PTF Operator [PTF_79] (rows=3060 width=302) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 DESC NULLS LAST","partition by:":"(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] - Select Operator [SEL_77] (rows=3060 width=302) + Select Operator [SEL_78] (rows=3060 width=302) Output:["_col0","_col1","_col2","_col3"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_76] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_77] PartitionCols:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END - Select Operator [SEL_75] (rows=3060 width=302) + Select Operator [SEL_76] (rows=3060 width=302) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_74] (rows=3060 width=302) + Group By Operator [GBY_75] (rows=3060 width=302) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_16] (rows=88740 width=302) + Group By Operator [GBY_17] (rows=88740 width=302) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0L - Select Operator [SEL_14] (rows=24992810 width=293) + Select Operator [SEL_15] (rows=24992810 width=293) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_60] (rows=24992810 width=293) - Conds:RS_11._col1=RS_73._col0(Inner),Output:["_col2","_col5","_col6"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_73] + Merge Join Operator [MERGEJOIN_61] (rows=24992810 width=293) + Conds:RS_63._col0=RS_13._col1(Inner),Output:["_col1","_col2","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_63] PartitionCols:_col0 - Select Operator [SEL_72] (rows=462000 width=186) + Select Operator [SEL_62] (rows=462000 width=186) Output:["_col0","_col1","_col2"] - TableScan [TS_6] (rows=462000 width=186) + TableScan [TS_0] (rows=462000 width=186) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_11] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_13] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_59] (rows=24992810 width=115) - Conds:RS_71._col0=RS_63._col0(Inner),Output:["_col1","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_63] + Merge Join Operator [MERGEJOIN_60] (rows=24992810 width=115) + Conds:RS_74._col0=RS_66._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_66] PartitionCols:_col0 - Select Operator [SEL_62] (rows=317 width=4) + Select Operator [SEL_65] (rows=317 width=4) Output:["_col0"] - Filter Operator [FIL_61] (rows=317 width=8) + Filter Operator [FIL_64] (rows=317 width=8) predicate:d_month_seq BETWEEN 1212 AND 1223 - TableScan [TS_3] (rows=73049 width=8) + TableScan [TS_5] (rows=73049 width=8) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_71] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_74] PartitionCols:_col0 - Select Operator [SEL_70] (rows=143966864 width=119) + Select Operator [SEL_73] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_69] (rows=143966864 width=119) + Filter Operator [FIL_72] (rows=143966864 width=119) predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_9_d1_d_date_sk_min) AND DynamicValue(RS_9_d1_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_9_d1_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=119) + TableScan [TS_2] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_net_paid"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_68] - Group By Operator [GBY_67] (rows=1 width=12) + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_71] + Group By Operator [GBY_70] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_66] - Group By Operator [GBY_65] (rows=1 width=12) + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_69] + Group By Operator [GBY_68] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_64] (rows=317 width=4) + Select Operator [SEL_67] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_62] + Please refer to the previous Select Operator [SEL_65] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query87.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query87.q.out index a4127aad7f..b86c8ce2cb 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query87.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query87.q.out @@ -55,234 +55,234 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE) -Map 21 <- Reducer 15 (BROADCAST_EDGE) -Map 22 <- Reducer 19 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 10 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 13 <- Map 20 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 15 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 10 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) -Reducer 17 <- Map 20 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 19 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Map 20 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 8 <- Union 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Map 13 <- Reducer 16 (BROADCAST_EDGE) +Map 21 <- Reducer 18 (BROADCAST_EDGE) +Map 22 <- Reducer 20 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 11 <- Map 1 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 15 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 18 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 15 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 20 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 7 <- Union 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 9 vectorized - File Output Operator [FS_268] - Group By Operator [GBY_267] (rows=1 width=8) + Reducer 8 vectorized + File Output Operator [FS_271] + Group By Operator [GBY_270] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_266] - Group By Operator [GBY_265] (rows=1 width=8) + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_269] + Group By Operator [GBY_268] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_264] (rows=8062883 width=16) - Filter Operator [FIL_263] (rows=8062883 width=16) + Select Operator [SEL_267] (rows=8062883 width=16) + Filter Operator [FIL_266] (rows=8062883 width=16) predicate:((_col3 > 0L) and ((_col3 * 2L) = _col4)) - Select Operator [SEL_262] (rows=48377300 width=16) + Select Operator [SEL_265] (rows=48377300 width=16) Output:["_col3","_col4"] - Group By Operator [GBY_261] (rows=48377300 width=290) + Group By Operator [GBY_264] (rows=48377300 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 7 [SIMPLE_EDGE] - <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_292] + <-Union 6 [SIMPLE_EDGE] + <-Reducer 12 [CONTAINS] vectorized + Reduce Output Operator [RS_295] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_291] (rows=48377300 width=290) + Group By Operator [GBY_294] (rows=48377300 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 - Select Operator [SEL_290] (rows=48377300 width=290) + Select Operator [SEL_293] (rows=48377300 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Select Operator [SEL_289] (rows=24986582 width=290) + Select Operator [SEL_292] (rows=24986582 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_288] (rows=24986582 width=282) + Group By Operator [GBY_291] (rows=24986582 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_287] (rows=24986582 width=274) + Select Operator [SEL_290] (rows=24986582 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_286] (rows=24986582 width=274) + Group By Operator [GBY_289] (rows=24986582 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_77] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_80] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_76] (rows=24986582 width=274) - Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 - Merge Join Operator [MERGEJOIN_189] (rows=24986582 width=274) - Conds:RS_72._col1=RS_245._col0(Inner),Output:["_col3","_col5","_col6"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_245] + Group By Operator [GBY_79] (rows=24986582 width=274) + Output:["_col0","_col1","_col2"],keys:_col2, _col1, _col6 + Merge Join Operator [MERGEJOIN_192] (rows=24986582 width=274) + Conds:RS_229._col0=RS_76._col1(Inner),Output:["_col1","_col2","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_229] PartitionCols:_col0 - Select Operator [SEL_242] (rows=80000000 width=184) + Select Operator [SEL_226] (rows=80000000 width=184) Output:["_col0","_col1","_col2"] - TableScan [TS_6] (rows=80000000 width=184) + TableScan [TS_0] (rows=80000000 width=184) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_first_name","c_last_name"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_72] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_76] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_188] (rows=24986582 width=97) - Conds:RS_285._col0=RS_229._col0(Inner),Output:["_col1","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_229] + Merge Join Operator [MERGEJOIN_191] (rows=24986582 width=97) + Conds:RS_288._col0=RS_236._col0(Inner),Output:["_col1","_col3"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_236] PartitionCols:_col0 - Select Operator [SEL_224] (rows=317 width=98) + Select Operator [SEL_231] (rows=317 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_223] (rows=317 width=102) + Filter Operator [FIL_230] (rows=317 width=102) predicate:d_month_seq BETWEEN 1212 AND 1223 - TableScan [TS_3] (rows=73049 width=102) + TableScan [TS_5] (rows=73049 width=102) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_month_seq"] <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_285] + SHUFFLE [RS_288] PartitionCols:_col0 - Select Operator [SEL_284] (rows=143930993 width=7) + Select Operator [SEL_287] (rows=143930993 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_283] (rows=143930993 width=7) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_70_date_dim_d_date_sk_min) AND DynamicValue(RS_70_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_70_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_61] (rows=144002668 width=7) + Filter Operator [FIL_286] (rows=143930993 width=7) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_72_date_dim_d_date_sk_min) AND DynamicValue(RS_72_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_72_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_65] (rows=144002668 width=7) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_282] - Group By Operator [GBY_281] (rows=1 width=12) + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_285] + Group By Operator [GBY_284] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_236] - Group By Operator [GBY_233] (rows=1 width=12) + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_243] + Group By Operator [GBY_240] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_230] (rows=317 width=4) + Select Operator [SEL_237] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_224] - <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_260] + Please refer to the previous Select Operator [SEL_231] + <-Reducer 5 [CONTAINS] vectorized + Reduce Output Operator [RS_263] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_259] (rows=48377300 width=290) + Group By Operator [GBY_262] (rows=48377300 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 - Select Operator [SEL_258] (rows=48377300 width=290) + Select Operator [SEL_261] (rows=48377300 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Select Operator [SEL_257] (rows=23390718 width=290) + Select Operator [SEL_260] (rows=23390718 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_256] (rows=23390718 width=282) + Group By Operator [GBY_259] (rows=23390718 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_255] (rows=23390718 width=290) + Select Operator [SEL_258] (rows=23390718 width=290) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_254] (rows=23390718 width=290) + Filter Operator [FIL_257] (rows=23390718 width=290) predicate:((_col3 > 0L) and ((_col3 * 2L) = _col4)) - Group By Operator [GBY_253] (rows=140344308 width=290) + Group By Operator [GBY_256] (rows=140344308 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 14 [CONTAINS] vectorized - Reduce Output Operator [RS_280] + <-Union 4 [SIMPLE_EDGE] + <-Reducer 10 [CONTAINS] vectorized + Reduce Output Operator [RS_283] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_279] (rows=140344308 width=290) + Group By Operator [GBY_282] (rows=140344308 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 - Select Operator [SEL_278] (rows=140344308 width=290) + Select Operator [SEL_281] (rows=140344308 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Select Operator [SEL_277] (rows=49146883 width=290) + Select Operator [SEL_280] (rows=49146883 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_276] (rows=49146883 width=282) + Group By Operator [GBY_279] (rows=49146883 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_275] (rows=49146883 width=274) + Select Operator [SEL_278] (rows=49146883 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_274] (rows=49146883 width=274) + Group By Operator [GBY_277] (rows=49146883 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_40] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_42] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_39] (rows=49146883 width=274) - Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 - Merge Join Operator [MERGEJOIN_187] (rows=49146883 width=274) - Conds:RS_35._col1=RS_244._col0(Inner),Output:["_col3","_col5","_col6"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_244] + Group By Operator [GBY_41] (rows=49146883 width=274) + Output:["_col0","_col1","_col2"],keys:_col2, _col1, _col6 + Merge Join Operator [MERGEJOIN_190] (rows=49146883 width=274) + Conds:RS_228._col0=RS_38._col1(Inner),Output:["_col1","_col2","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_228] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_242] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_35] + Please refer to the previous Select Operator [SEL_226] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_38] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_186] (rows=49146883 width=97) - Conds:RS_273._col0=RS_227._col0(Inner),Output:["_col1","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_227] + Merge Join Operator [MERGEJOIN_189] (rows=49146883 width=97) + Conds:RS_276._col0=RS_234._col0(Inner),Output:["_col1","_col3"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_234] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_224] + Please refer to the previous Select Operator [SEL_231] <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_273] + SHUFFLE [RS_276] PartitionCols:_col0 - Select Operator [SEL_272] (rows=285117831 width=7) + Select Operator [SEL_275] (rows=285117831 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_271] (rows=285117831 width=7) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_33_date_dim_d_date_sk_min) AND DynamicValue(RS_33_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_33_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_24] (rows=287989836 width=7) + Filter Operator [FIL_274] (rows=285117831 width=7) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_34_date_dim_d_date_sk_min) AND DynamicValue(RS_34_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_34_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_27] (rows=287989836 width=7) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk"] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_270] - Group By Operator [GBY_269] (rows=1 width=12) + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_273] + Group By Operator [GBY_272] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_235] - Group By Operator [GBY_232] (rows=1 width=12) + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_242] + Group By Operator [GBY_239] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_228] (rows=317 width=4) + Select Operator [SEL_235] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_224] - <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_252] + Please refer to the previous Select Operator [SEL_231] + <-Reducer 3 [CONTAINS] vectorized + Reduce Output Operator [RS_255] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_251] (rows=140344308 width=290) + Group By Operator [GBY_254] (rows=140344308 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 - Select Operator [SEL_250] (rows=140344308 width=290) + Select Operator [SEL_253] (rows=140344308 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Select Operator [SEL_249] (rows=91197425 width=290) + Select Operator [SEL_252] (rows=91197425 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_248] (rows=91197425 width=282) + Group By Operator [GBY_251] (rows=91197425 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_247] (rows=91197425 width=274) + Select Operator [SEL_250] (rows=91197425 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_246] (rows=91197425 width=274) + Group By Operator [GBY_249] (rows=91197425 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_16] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_15] (rows=91197425 width=274) - Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 - Merge Join Operator [MERGEJOIN_185] (rows=91197425 width=274) - Conds:RS_11._col1=RS_243._col0(Inner),Output:["_col3","_col5","_col6"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_243] + Group By Operator [GBY_16] (rows=91197425 width=274) + Output:["_col0","_col1","_col2"],keys:_col2, _col1, _col6 + Merge Join Operator [MERGEJOIN_188] (rows=91197425 width=274) + Conds:RS_227._col0=RS_13._col1(Inner),Output:["_col1","_col2","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_227] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_242] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_11] + Please refer to the previous Select Operator [SEL_226] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_13] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_184] (rows=91197425 width=96) - Conds:RS_241._col0=RS_225._col0(Inner),Output:["_col1","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] + Merge Join Operator [MERGEJOIN_187] (rows=91197425 width=96) + Conds:RS_248._col0=RS_232._col0(Inner),Output:["_col1","_col3"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_232] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_224] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_241] + Please refer to the previous Select Operator [SEL_231] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_248] PartitionCols:_col0 - Select Operator [SEL_240] (rows=525327388 width=7) + Select Operator [SEL_247] (rows=525327388 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_239] (rows=525327388 width=7) + Filter Operator [FIL_246] (rows=525327388 width=7) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=7) + TableScan [TS_2] (rows=575995635 width=7) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_238] - Group By Operator [GBY_237] (rows=1 width=12) + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_245] + Group By Operator [GBY_244] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] - Group By Operator [GBY_231] (rows=1 width=12) + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_241] + Group By Operator [GBY_238] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_226] (rows=317 width=4) + Select Operator [SEL_233] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_224] + Please refer to the previous Select Operator [SEL_231] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query91.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query91.q.out index 0d0c39a05a..fc4ddb06a1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query91.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query91.q.out @@ -78,119 +78,119 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 10 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_170] - Select Operator [SEL_169] (rows=40890 width=406) + Reducer 7 vectorized + File Output Operator [FS_171] + Select Operator [SEL_170] (rows=40890 width=406) Output:["_col0","_col1","_col2","_col3"] - <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_168] - Select Operator [SEL_167] (rows=40890 width=518) + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_169] + Select Operator [SEL_168] (rows=40890 width=518) Output:["_col0","_col1","_col2","_col4"] - Group By Operator [GBY_166] (rows=40890 width=585) + Group By Operator [GBY_167] (rows=40890 width=585) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_41] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_42] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_40] (rows=40890 width=585) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col11)"],keys:_col5, _col6, _col15, _col16, _col17 - Merge Join Operator [MERGEJOIN_145] (rows=231957 width=473) - Conds:RS_36._col10=RS_165._col0(Inner),Output:["_col5","_col6","_col11","_col15","_col16","_col17"] + Group By Operator [GBY_41] (rows=40890 width=585) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col11)"],keys:_col6, _col7, _col15, _col16, _col17 + Merge Join Operator [MERGEJOIN_146] (rows=231957 width=473) + Conds:RS_37._col10=RS_166._col0(Inner),Output:["_col6","_col7","_col11","_col15","_col16","_col17"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_165] + SHUFFLE [RS_166] PartitionCols:_col0 - Select Operator [SEL_164] (rows=60 width=298) + Select Operator [SEL_165] (rows=60 width=298) Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_22] (rows=60 width=298) + TableScan [TS_26] (rows=60 width=298) default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_call_center_id","cc_name","cc_manager"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_36] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_37] PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_144] (rows=231957 width=179) - Conds:RS_33._col2=RS_163._col0(Inner),Output:["_col5","_col6","_col10","_col11"] + Merge Join Operator [MERGEJOIN_145] (rows=231957 width=179) + Conds:RS_34._col3=RS_164._col0(Inner),Output:["_col6","_col7","_col10","_col11"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_163] + SHUFFLE [RS_164] PartitionCols:_col0 - Select Operator [SEL_162] (rows=3600 width=4) + Select Operator [SEL_163] (rows=3600 width=4) Output:["_col0"] - Filter Operator [FIL_161] (rows=3600 width=96) + Filter Operator [FIL_162] (rows=3600 width=96) predicate:(hd_buy_potential like '0-500%') - TableScan [TS_19] (rows=7200 width=96) + TableScan [TS_23] (rows=7200 width=96) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_buy_potential"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_33] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_143] (rows=463913 width=181) - Conds:RS_30._col0=RS_31._col1(Inner),Output:["_col2","_col5","_col6","_col10","_col11"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_144] (rows=463913 width=181) + Conds:RS_31._col1=RS_32._col1(Inner),Output:["_col3","_col6","_col7","_col10","_col11"] <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_31] + SHUFFLE [RS_32] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_142] (rows=657590 width=19) - Conds:RS_157._col0=RS_160._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_143] (rows=657590 width=19) + Conds:RS_158._col0=RS_161._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_157] + SHUFFLE [RS_158] PartitionCols:_col0 - Select Operator [SEL_156] (rows=27658583 width=121) + Select Operator [SEL_157] (rows=27658583 width=121) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_155] (rows=27658583 width=121) + Filter Operator [FIL_156] (rows=27658583 width=121) predicate:(cr_call_center_sk is not null and cr_returning_customer_sk is not null and cr_returned_date_sk is not null) - TableScan [TS_9] (rows=28798881 width=121) + TableScan [TS_13] (rows=28798881 width=121) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_call_center_sk","cr_net_loss"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_160] + SHUFFLE [RS_161] PartitionCols:_col0 - Select Operator [SEL_159] (rows=50 width=4) + Select Operator [SEL_160] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_158] (rows=50 width=12) + Filter Operator [FIL_159] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 11)) - TableScan [TS_12] (rows=73049 width=12) + TableScan [TS_16] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_141] (rows=213205 width=183) - Conds:RS_27._col3=RS_154._col0(Inner),Output:["_col0","_col2","_col5","_col6"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_154] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_142] (rows=213205 width=183) + Conds:RS_149._col0=RS_29._col3(Inner),Output:["_col1","_col3","_col6","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_149] PartitionCols:_col0 - Select Operator [SEL_153] (rows=8000000 width=4) + Select Operator [SEL_148] (rows=8000000 width=4) Output:["_col0"] - Filter Operator [FIL_152] (rows=8000000 width=112) + Filter Operator [FIL_147] (rows=8000000 width=112) predicate:(ca_gmt_offset = -7) - TableScan [TS_6] (rows=40000000 width=112) + TableScan [TS_0] (rows=40000000 width=112) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_27] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_29] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_140] (rows=1066023 width=187) - Conds:RS_148._col1=RS_151._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_148] - PartitionCols:_col1 - Select Operator [SEL_147] (rows=74500295 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_146] (rows=74500295 width=15) - predicate:(c_current_hdemo_sk is not null and c_current_cdemo_sk is not null and c_current_addr_sk is not null) - TableScan [TS_0] (rows=80000000 width=15) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_151] + Merge Join Operator [MERGEJOIN_141] (rows=1066023 width=187) + Conds:RS_152._col1=RS_155._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_155] PartitionCols:_col0 - Select Operator [SEL_150] (rows=26269 width=183) + Select Operator [SEL_154] (rows=26269 width=183) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_149] (rows=26269 width=183) + Filter Operator [FIL_153] (rows=26269 width=183) predicate:((cd_marital_status) IN ('M', 'W') and (cd_education_status) IN ('Unknown', 'Advanced Degree') and (struct(cd_marital_status,cd_education_status)) IN (const struct('M','Unknown'), const struct('W','Advanced Degree'))) - TableScan [TS_3] (rows=1861800 width=183) + TableScan [TS_6] (rows=1861800 width=183) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_152] + PartitionCols:_col1 + Select Operator [SEL_151] (rows=74500295 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_150] (rows=74500295 width=15) + predicate:(c_current_hdemo_sk is not null and c_current_cdemo_sk is not null and c_current_addr_sk is not null) + TableScan [TS_3] (rows=80000000 width=15) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk"] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query93.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query93.q.out index 5374a858c8..04e545f465 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query93.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query93.q.out @@ -43,79 +43,79 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 8 <- Reducer 6 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Map 1 <- Reducer 7 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 vectorized - File Output Operator [FS_81] - Limit [LIM_80] (rows=100 width=112) + Reducer 4 vectorized + File Output Operator [FS_82] + Limit [LIM_81] (rows=100 width=112) Number of rows:100 - Select Operator [SEL_79] (rows=38308 width=108) + Select Operator [SEL_80] (rows=38308 width=108) Output:["_col0","_col1"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_78] - Top N Key Operator [TNK_77] (rows=38308 width=112) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_79] + Top N Key Operator [TNK_78] (rows=38308 width=112) keys:_col1, _col0,top n:100 - Group By Operator [GBY_76] (rows=38308 width=112) + Group By Operator [GBY_77] (rows=38308 width=112) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=38308 width=112) + Group By Operator [GBY_17] (rows=38308 width=112) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_14] (rows=15586502 width=3) + Select Operator [SEL_15] (rows=15586502 width=3) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_64] (rows=15586502 width=3) - Conds:RS_11._col0, _col2=RS_75._col0, _col2(Inner),Output:["_col3","_col6","_col8","_col9"] - <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_11] + Merge Join Operator [MERGEJOIN_65] (rows=15586502 width=3) + Conds:RS_76._col0, _col2=RS_13._col0, _col2(Inner),Output:["_col1","_col3","_col4","_col8"] + <-Reducer 6 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_13] PartitionCols:_col0, _col2 - Merge Join Operator [MERGEJOIN_63] (rows=1522298 width=8) - Conds:RS_67._col1=RS_70._col0(Inner),Output:["_col0","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_67] + Merge Join Operator [MERGEJOIN_64] (rows=1522298 width=8) + Conds:RS_68._col1=RS_71._col0(Inner),Output:["_col0","_col2","_col3"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_68] PartitionCols:_col1 - Select Operator [SEL_66] (rows=55574563 width=15) + Select Operator [SEL_67] (rows=55574563 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_65] (rows=55574563 width=15) + Filter Operator [FIL_66] (rows=55574563 width=15) predicate:sr_reason_sk is not null - TableScan [TS_0] (rows=57591150 width=15) + TableScan [TS_2] (rows=57591150 width=15) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_reason_sk","sr_ticket_number","sr_return_quantity"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_70] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_71] PartitionCols:_col0 - Select Operator [SEL_69] (rows=1 width=4) + Select Operator [SEL_70] (rows=1 width=4) Output:["_col0"] - Filter Operator [FIL_68] (rows=1 width=101) + Filter Operator [FIL_69] (rows=1 width=101) predicate:(r_reason_desc = 'Did not like the warranty') - TableScan [TS_3] (rows=72 width=101) + TableScan [TS_5] (rows=72 width=101) default@reason,reason,Tbl:COMPLETE,Col:COMPLETE,Output:["r_reason_sk","r_reason_desc"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_75] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_76] PartitionCols:_col0, _col2 - Select Operator [SEL_74] (rows=575995635 width=122) + Select Operator [SEL_75] (rows=575995635 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_73] (rows=575995635 width=122) - predicate:(ss_ticket_number BETWEEN DynamicValue(RS_11_store_returns_sr_ticket_number_min) AND DynamicValue(RS_11_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_11_store_returns_sr_ticket_number_bloom_filter))) - TableScan [TS_6] (rows=575995635 width=122) + Filter Operator [FIL_74] (rows=575995635 width=122) + predicate:(ss_ticket_number BETWEEN DynamicValue(RS_13_store_returns_sr_ticket_number_min) AND DynamicValue(RS_13_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_13_store_returns_sr_ticket_number_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=122) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_ticket_number","ss_quantity","ss_sales_price"] - <-Reducer 6 [BROADCAST_EDGE] vectorized - BROADCAST [RS_72] - Group By Operator [GBY_71] (rows=1 width=12) + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_73] + Group By Operator [GBY_72] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_60] - Group By Operator [GBY_59] (rows=1 width=12) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_41] + Group By Operator [GBY_40] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_58] (rows=1522298 width=8) + Select Operator [SEL_39] (rows=1522298 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_63] + Please refer to the previous Merge Join Operator [MERGEJOIN_64] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out index fabac20266..2c4dbf6881 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query94.q.out @@ -69,147 +69,149 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 12 (BROADCAST_EDGE) -Map 14 <- Reducer 9 (BROADCAST_EDGE) -Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Map 10 <- Reducer 9 (BROADCAST_EDGE) +Map 14 <- Reducer 8 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) Reducer 16 <- Map 15 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 1 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_158] - Group By Operator [GBY_157] (rows=1 width=232) + Reducer 7 vectorized + File Output Operator [FS_159] + Group By Operator [GBY_158] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_156] - Group By Operator [GBY_155] (rows=1 width=232) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_157] + Group By Operator [GBY_156] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_154] (rows=5022875 width=228) + Group By Operator [GBY_155] (rows=5022875 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_69] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_70] PartitionCols:_col0 - Group By Operator [GBY_68] (rows=5022875 width=228) + Group By Operator [GBY_69] (rows=5022875 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 - Select Operator [SEL_41] (rows=5022875 width=229) + Select Operator [SEL_42] (rows=5022875 width=229) Output:["_col4","_col5","_col6"] - Filter Operator [FIL_40] (rows=5022875 width=229) + Filter Operator [FIL_41] (rows=5022875 width=229) predicate:_col13 is null - Merge Join Operator [MERGEJOIN_125] (rows=10045750 width=229) - Conds:RS_37._col4=RS_153._col1(Left Outer),Output:["_col4","_col5","_col6","_col13"] + Merge Join Operator [MERGEJOIN_126] (rows=10045750 width=229) + Conds:RS_38._col4=RS_154._col1(Left Outer),Output:["_col4","_col5","_col6","_col13"] <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] + SHUFFLE [RS_154] PartitionCols:_col1 - Select Operator [SEL_152] (rows=8007986 width=8) + Select Operator [SEL_153] (rows=8007986 width=8) Output:["_col0","_col1"] - Group By Operator [GBY_151] (rows=8007986 width=4) + Group By Operator [GBY_152] (rows=8007986 width=4) Output:["_col0"],keys:KEY._col0 <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_150] + SHUFFLE [RS_151] PartitionCols:_col0 - Group By Operator [GBY_149] (rows=14398467 width=4) + Group By Operator [GBY_150] (rows=14398467 width=4) Output:["_col0"],keys:wr_order_number - TableScan [TS_25] (rows=14398467 width=4) + TableScan [TS_26] (rows=14398467 width=4) default@web_returns,wr1,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_order_number"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_37] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_38] PartitionCols:_col4 - Select Operator [SEL_36] (rows=5022875 width=231) + Select Operator [SEL_37] (rows=5022875 width=231) Output:["_col4","_col5","_col6"] - Merge Join Operator [MERGEJOIN_124] (rows=5022875 width=235) - Conds:RS_33._col4=RS_148._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} - <-Reducer 4 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_33] + Merge Join Operator [MERGEJOIN_125] (rows=5022875 width=235) + Conds:RS_34._col4=RS_149._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} + <-Reducer 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_34] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_123] (rows=5022875 width=231) - Conds:RS_18._col2=RS_142._col0(Inner),Output:["_col3","_col4","_col5","_col6"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_142] - PartitionCols:_col0 - Select Operator [SEL_141] (rows=12 width=91) - Output:["_col0"] - Filter Operator [FIL_140] (rows=12 width=92) - predicate:(web_company_name = 'pri') - TableScan [TS_9] (rows=84 width=92) - default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_company_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_122] (rows=15673790 width=235) - Conds:RS_15._col1=RS_128._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] - PartitionCols:_col0 - Select Operator [SEL_127] (rows=784314 width=90) - Output:["_col0"] - Filter Operator [FIL_126] (rows=784314 width=90) - predicate:(ca_state = 'TX') - TableScan [TS_6] (rows=40000000 width=90) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_121] (rows=15987241 width=239) - Conds:RS_136._col0=RS_139._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_136] - PartitionCols:_col0 - Select Operator [SEL_135] (rows=143895019 width=243) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_134] (rows=143895019 width=243) - predicate:(ws_web_site_sk is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=243) - default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_133] - Group By Operator [GBY_132] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_131] - Group By Operator [GBY_130] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_129] (rows=784314 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_127] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_139] - PartitionCols:_col0 - Select Operator [SEL_138] (rows=8116 width=98) - Output:["_col0"] - Filter Operator [FIL_137] (rows=8116 width=98) - predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' - TableScan [TS_3] (rows=73049 width=98) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + Select Operator [SEL_22] (rows=5022875 width=231) + Output:["_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_124] (rows=5022875 width=231) + Conds:RS_19._col4=RS_143._col0(Inner),Output:["_col5","_col6","_col7","_col8"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_143] + PartitionCols:_col0 + Select Operator [SEL_142] (rows=12 width=91) + Output:["_col0"] + Filter Operator [FIL_141] (rows=12 width=92) + predicate:(web_company_name = 'pri') + TableScan [TS_13] (rows=84 width=92) + default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_company_name"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_123] (rows=15673790 width=235) + Conds:RS_129._col0=RS_17._col1(Inner),Output:["_col4","_col5","_col6","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_129] + PartitionCols:_col0 + Select Operator [SEL_128] (rows=784314 width=90) + Output:["_col0"] + Filter Operator [FIL_127] (rows=784314 width=90) + predicate:(ca_state = 'TX') + TableScan [TS_0] (rows=40000000 width=90) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_122] (rows=15987241 width=239) + Conds:RS_137._col0=RS_140._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_137] + PartitionCols:_col0 + Select Operator [SEL_136] (rows=143895019 width=243) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_135] (rows=143895019 width=243) + predicate:(ws_web_site_sk is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) + TableScan [TS_3] (rows=144002668 width=243) + default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_134] + Group By Operator [GBY_133] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_132] + Group By Operator [GBY_131] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_130] (rows=784314 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_128] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_140] + PartitionCols:_col0 + Select Operator [SEL_139] (rows=8116 width=98) + Output:["_col0"] + Filter Operator [FIL_138] (rows=8116 width=98) + predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' + TableScan [TS_6] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_148] + SHUFFLE [RS_149] PartitionCols:_col0 - Group By Operator [GBY_147] (rows=143966743 width=7) + Group By Operator [GBY_148] (rows=143966743 width=7) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_146] (rows=143966743 width=7) + Select Operator [SEL_147] (rows=143966743 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_145] (rows=143966743 width=7) - predicate:(ws_warehouse_sk is not null and ws_order_number BETWEEN DynamicValue(RS_33_ws1_ws_order_number_min) AND DynamicValue(RS_33_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_33_ws1_ws_order_number_bloom_filter))) - TableScan [TS_22] (rows=144002668 width=7) + Filter Operator [FIL_146] (rows=143966743 width=7) + predicate:(ws_warehouse_sk is not null and ws_order_number BETWEEN DynamicValue(RS_34_ws1_ws_order_number_min) AND DynamicValue(RS_34_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_34_ws1_ws_order_number_bloom_filter))) + TableScan [TS_23] (rows=144002668 width=7) default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_144] - Group By Operator [GBY_143] (rows=1 width=12) + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_145] + Group By Operator [GBY_144] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_111] - Group By Operator [GBY_110] (rows=1 width=12) + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_112] + Group By Operator [GBY_111] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_109] (rows=5022875 width=8) + Select Operator [SEL_110] (rows=5022875 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_123] + Please refer to the previous Select Operator [SEL_22] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out index 39d35ec330..1d21610812 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out @@ -75,219 +75,219 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 13 (BROADCAST_EDGE) -Map 15 <- Reducer 10 (BROADCAST_EDGE) -Map 18 <- Reducer 10 (BROADCAST_EDGE) -Map 19 <- Reducer 9 (BROADCAST_EDGE) -Map 23 <- Reducer 9 (BROADCAST_EDGE) -Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) -Reducer 21 <- Map 24 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (SIMPLE_EDGE) -Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 22 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 14 (BROADCAST_EDGE) +Map 10 <- Reducer 19 (BROADCAST_EDGE) +Map 15 <- Reducer 19 (BROADCAST_EDGE) +Map 21 <- Reducer 20 (BROADCAST_EDGE) +Map 8 <- Reducer 14 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 16 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 18 <- Map 24 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 20 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 21 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_276] - Group By Operator [GBY_275] (rows=1 width=232) + Reducer 7 vectorized + File Output Operator [FS_279] + Group By Operator [GBY_278] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_274] - Group By Operator [GBY_273] (rows=1 width=232) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_277] + Group By Operator [GBY_276] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_272] (rows=5022875 width=228) + Group By Operator [GBY_275] (rows=5022875 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_105] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_108] PartitionCols:_col0 - Group By Operator [GBY_104] (rows=5022875 width=228) - Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3 - Merge Join Operator [MERGEJOIN_231] (rows=5022875 width=227) - Conds:RS_55._col3=RS_271._col0(Inner),Output:["_col3","_col4","_col5"] - <-Reducer 5 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_55] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_230] (rows=5022875 width=227) - Conds:RS_52._col3=RS_259._col0(Inner),Output:["_col3","_col4","_col5"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_52] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_228] (rows=5022875 width=227) - Conds:RS_49._col2=RS_248._col0(Inner),Output:["_col3","_col4","_col5"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_248] + Group By Operator [GBY_107] (rows=5022875 width=228) + Output:["_col0","_col2","_col3"],aggregations:["sum(_col7)","sum(_col8)"],keys:_col6 + Merge Join Operator [MERGEJOIN_234] (rows=5022875 width=227) + Conds:RS_274._col0=RS_59._col5(Inner),Output:["_col6","_col7","_col8"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_59] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_233] (rows=5022875 width=227) + Conds:RS_262._col0=RS_55._col4(Inner),Output:["_col5","_col6","_col7"] + <-Reducer 18 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_55] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_232] (rows=5022875 width=227) + Conds:RS_50._col3=RS_251._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_251] PartitionCols:_col0 - Select Operator [SEL_247] (rows=12 width=4) + Select Operator [SEL_250] (rows=12 width=4) Output:["_col0"] - Filter Operator [FIL_246] (rows=12 width=92) + Filter Operator [FIL_249] (rows=12 width=92) predicate:(web_company_name = 'pri') - TableScan [TS_9] (rows=84 width=92) + TableScan [TS_44] (rows=84 width=92) default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_company_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_227] (rows=15673790 width=231) - Conds:RS_46._col1=RS_234._col0(Inner),Output:["_col2","_col3","_col4","_col5"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_231] (rows=15673790 width=231) + Conds:RS_237._col0=RS_48._col1(Inner),Output:["_col3","_col4","_col5","_col6"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_237] PartitionCols:_col0 - Select Operator [SEL_233] (rows=784314 width=4) + Select Operator [SEL_236] (rows=784314 width=4) Output:["_col0"] - Filter Operator [FIL_232] (rows=784314 width=90) + Filter Operator [FIL_235] (rows=784314 width=90) predicate:(ca_state = 'TX') - TableScan [TS_6] (rows=40000000 width=90) + TableScan [TS_31] (rows=40000000 width=90) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_46] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_48] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_226] (rows=15987241 width=235) - Conds:RS_242._col0=RS_245._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_242] + Merge Join Operator [MERGEJOIN_230] (rows=15987241 width=235) + Conds:RS_245._col0=RS_248._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_245] PartitionCols:_col0 - Select Operator [SEL_241] (rows=143895019 width=239) + Select Operator [SEL_244] (rows=143895019 width=239) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_240] (rows=143895019 width=239) + Filter Operator [FIL_243] (rows=143895019 width=239) predicate:(ws_web_site_sk is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_ship_addr_sk BETWEEN DynamicValue(RS_47_customer_address_ca_address_sk_min) AND DynamicValue(RS_47_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_47_customer_address_ca_address_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=239) + TableScan [TS_34] (rows=144002668 width=239) default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_239] - Group By Operator [GBY_238] (rows=1 width=12) + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_242] + Group By Operator [GBY_241] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_237] - Group By Operator [GBY_236] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_240] + Group By Operator [GBY_239] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_235] (rows=784314 width=4) + Select Operator [SEL_238] (rows=784314 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_233] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_245] + Please refer to the previous Select Operator [SEL_236] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_248] PartitionCols:_col0 - Select Operator [SEL_244] (rows=8116 width=98) + Select Operator [SEL_247] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_243] (rows=8116 width=98) + Filter Operator [FIL_246] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' - TableScan [TS_3] (rows=73049 width=98) + TableScan [TS_37] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_259] + <-Reducer 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_262] PartitionCols:_col0 - Group By Operator [GBY_258] (rows=14686712 width=4) + Group By Operator [GBY_261] (rows=14686712 width=4) Output:["_col0"],keys:KEY._col0 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_22] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_28] PartitionCols:_col0 - Group By Operator [GBY_21] (rows=144002668 width=4) + Group By Operator [GBY_27] (rows=144002668 width=4) Output:["_col0"],keys:_col1 - Select Operator [SEL_20] (rows=1411940834 width=11) + Select Operator [SEL_26] (rows=1411940834 width=11) Output:["_col1"] - Filter Operator [FIL_19] (rows=1411940834 width=11) + Filter Operator [FIL_25] (rows=1411940834 width=11) predicate:(_col0 <> _col2) Merge Join Operator [MERGEJOIN_229] (rows=1411940834 width=11) - Conds:RS_254._col1=RS_257._col1(Inner),Output:["_col0","_col1","_col2"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_254] + Conds:RS_257._col1=RS_260._col1(Inner),Output:["_col0","_col1","_col2"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_257] PartitionCols:_col1 - Select Operator [SEL_253] (rows=144002668 width=7) + Select Operator [SEL_256] (rows=144002668 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_252] (rows=144002668 width=7) - predicate:(ws_order_number BETWEEN DynamicValue(RS_52_ws1_ws_order_number_min) AND DynamicValue(RS_52_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_52_ws1_ws_order_number_bloom_filter))) - TableScan [TS_12] (rows=144002668 width=7) + Filter Operator [FIL_255] (rows=144002668 width=7) + predicate:(ws_order_number BETWEEN DynamicValue(RS_55_ws1_ws_order_number_min) AND DynamicValue(RS_55_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_55_ws1_ws_order_number_bloom_filter))) + TableScan [TS_18] (rows=144002668 width=7) default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_250] - Group By Operator [GBY_249] (rows=1 width=12) + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_253] + Group By Operator [GBY_252] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_183] - Group By Operator [GBY_182] (rows=1 width=12) + <-Reducer 18 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_130] + Group By Operator [GBY_129] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_181] (rows=5022875 width=8) + Select Operator [SEL_128] (rows=5022875 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_228] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_257] + Please refer to the previous Merge Join Operator [MERGEJOIN_232] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_260] PartitionCols:_col1 - Select Operator [SEL_256] (rows=144002668 width=7) + Select Operator [SEL_259] (rows=144002668 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_255] (rows=144002668 width=7) - predicate:(ws_order_number BETWEEN DynamicValue(RS_52_ws1_ws_order_number_min) AND DynamicValue(RS_52_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_52_ws1_ws_order_number_bloom_filter))) - TableScan [TS_14] (rows=144002668 width=7) + Filter Operator [FIL_258] (rows=144002668 width=7) + predicate:(ws_order_number BETWEEN DynamicValue(RS_55_ws1_ws_order_number_min) AND DynamicValue(RS_55_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_55_ws1_ws_order_number_bloom_filter))) + TableScan [TS_20] (rows=144002668 width=7) default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_251] - Please refer to the previous Group By Operator [GBY_249] - <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_271] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_254] + Please refer to the previous Group By Operator [GBY_252] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_274] PartitionCols:_col0 - Group By Operator [GBY_270] (rows=8007986 width=4) + Group By Operator [GBY_273] (rows=8007986 width=4) Output:["_col0"],keys:KEY._col0 - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_40] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_15] PartitionCols:_col0 - Group By Operator [GBY_39] (rows=14398467 width=4) + Group By Operator [GBY_14] (rows=14398467 width=4) Output:["_col0"],keys:_col14 - Merge Join Operator [MERGEJOIN_225] (rows=1384229738 width=4) - Conds:RS_35._col0=RS_269.wr_order_number(Inner),Output:["_col14"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_269] + Merge Join Operator [MERGEJOIN_228] (rows=1384229738 width=4) + Conds:RS_10._col0=RS_272.wr_order_number(Inner),Output:["_col14"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_272] PartitionCols:wr_order_number - TableScan [TS_34] (rows=14398467 width=4) + TableScan [TS_9] (rows=14398467 width=4) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_order_number"] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_35] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_10] PartitionCols:_col0 - Select Operator [SEL_33] (rows=1411940834 width=4) + Select Operator [SEL_8] (rows=1411940834 width=4) Output:["_col0"] - Filter Operator [FIL_32] (rows=1411940834 width=11) + Filter Operator [FIL_7] (rows=1411940834 width=11) predicate:(_col0 <> _col2) - Merge Join Operator [MERGEJOIN_224] (rows=1411940834 width=11) - Conds:RS_265._col1=RS_268._col1(Inner),Output:["_col0","_col1","_col2"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_265] + Merge Join Operator [MERGEJOIN_227] (rows=1411940834 width=11) + Conds:RS_268._col1=RS_271._col1(Inner),Output:["_col0","_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_268] PartitionCols:_col1 - Select Operator [SEL_264] (rows=144002668 width=7) + Select Operator [SEL_267] (rows=144002668 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_263] (rows=144002668 width=7) - predicate:(ws_order_number BETWEEN DynamicValue(RS_55_ws1_ws_order_number_min) AND DynamicValue(RS_55_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_55_ws1_ws_order_number_bloom_filter))) - TableScan [TS_25] (rows=144002668 width=7) + Filter Operator [FIL_266] (rows=144002668 width=7) + predicate:(ws_order_number BETWEEN DynamicValue(RS_59_ws1_ws_order_number_min) AND DynamicValue(RS_59_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_59_ws1_ws_order_number_bloom_filter))) + TableScan [TS_0] (rows=144002668 width=7) default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_261] - Group By Operator [GBY_260] (rows=1 width=12) + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_264] + Group By Operator [GBY_263] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_202] - Group By Operator [GBY_201] (rows=1 width=12) + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_125] + Group By Operator [GBY_124] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_200] (rows=5022875 width=8) + Select Operator [SEL_123] (rows=5022875 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_230] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_268] + Please refer to the previous Merge Join Operator [MERGEJOIN_233] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_271] PartitionCols:_col1 - Select Operator [SEL_267] (rows=144002668 width=7) + Select Operator [SEL_270] (rows=144002668 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_266] (rows=144002668 width=7) - predicate:(ws_order_number BETWEEN DynamicValue(RS_55_ws1_ws_order_number_min) AND DynamicValue(RS_55_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_55_ws1_ws_order_number_bloom_filter))) - TableScan [TS_27] (rows=144002668 width=7) + Filter Operator [FIL_269] (rows=144002668 width=7) + predicate:(ws_order_number BETWEEN DynamicValue(RS_59_ws1_ws_order_number_min) AND DynamicValue(RS_59_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_59_ws1_ws_order_number_bloom_filter))) + TableScan [TS_2] (rows=144002668 width=7) default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_262] - Please refer to the previous Group By Operator [GBY_260] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_265] + Please refer to the previous Group By Operator [GBY_263] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query98.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query98.q.out index a03eb1a466..2ab2cd7170 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query98.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query98.q.out @@ -71,83 +71,83 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Map 6 <- Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_79] - Select Operator [SEL_78] (rows=138600 width=701) + Reducer 5 vectorized + File Output Operator [FS_80] + Select Operator [SEL_79] (rows=138600 width=701) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_77] - Select Operator [SEL_76] (rows=138600 width=801) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_78] + Select Operator [SEL_77] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - PTF Operator [PTF_75] (rows=138600 width=689) + PTF Operator [PTF_76] (rows=138600 width=689) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col1"}] - Select Operator [SEL_74] (rows=138600 width=689) + Select Operator [SEL_75] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_73] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_74] PartitionCols:_col1 - Group By Operator [GBY_72] (rows=138600 width=689) + Group By Operator [GBY_73] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_16] (rows=138600 width=689) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col9, _col8, _col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_57] (rows=18334631 width=577) - Conds:RS_12._col1=RS_71._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_71] + Group By Operator [GBY_17] (rows=138600 width=689) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col8)"],keys:_col5, _col4, _col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_58] (rows=18334631 width=577) + Conds:RS_61._col0=RS_14._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_61] PartitionCols:_col0 - Select Operator [SEL_70] (rows=138600 width=581) + Select Operator [SEL_60] (rows=138600 width=581) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_69] (rows=138600 width=581) + Filter Operator [FIL_59] (rows=138600 width=581) predicate:(i_category) IN ('Jewelry', 'Sports', 'Books') - TableScan [TS_6] (rows=462000 width=581) + TableScan [TS_0] (rows=462000 width=581) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_56] (rows=61115434 width=70) - Conds:RS_68._col0=RS_60._col0(Inner),Output:["_col1","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_60] + Merge Join Operator [MERGEJOIN_57] (rows=61115434 width=70) + Conds:RS_72._col0=RS_64._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_64] PartitionCols:_col0 - Select Operator [SEL_59] (rows=8116 width=4) + Select Operator [SEL_63] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_58] (rows=8116 width=98) + Filter Operator [FIL_62] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00' AND TIMESTAMP'2001-02-11 00:00:00' - TableScan [TS_3] (rows=73049 width=98) + TableScan [TS_6] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_68] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_72] PartitionCols:_col0 - Select Operator [SEL_67] (rows=550076554 width=114) + Select Operator [SEL_71] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_66] (rows=550076554 width=114) + Filter Operator [FIL_70] (rows=550076554 width=114) predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=114) + TableScan [TS_3] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_65] - Group By Operator [GBY_64] (rows=1 width=12) + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_69] + Group By Operator [GBY_68] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_63] - Group By Operator [GBY_62] (rows=1 width=12) + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_67] + Group By Operator [GBY_66] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_61] (rows=8116 width=4) + Select Operator [SEL_65] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_59] + Please refer to the previous Select Operator [SEL_63] diff --git a/ql/src/test/results/clientpositive/perf/tez/query1.q.out b/ql/src/test/results/clientpositive/perf/tez/query1.q.out index ea754e01f9..52c2ab49f9 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query1.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query1.q.out @@ -59,127 +59,127 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) -Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 7 <- Map 11 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 10 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 8 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 vectorized - File Output Operator [FS_166] - Limit [LIM_165] (rows=100 width=100) + Reducer 4 vectorized + File Output Operator [FS_167] + Limit [LIM_166] (rows=100 width=100) Number of rows:100 - Select Operator [SEL_164] (rows=816091 width=100) + Select Operator [SEL_165] (rows=816091 width=100) Output:["_col0"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_52] - Select Operator [SEL_51] (rows=816091 width=100) + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_53] + Select Operator [SEL_52] (rows=816091 width=100) Output:["_col0"] - Top N Key Operator [TNK_79] (rows=816091 width=324) - keys:_col5,top n:100 - Filter Operator [FIL_50] (rows=816091 width=324) - predicate:(_col3 > _col6) - Merge Join Operator [MERGEJOIN_137] (rows=2448274 width=324) - Conds:RS_47._col2=RS_163._col1(Inner),Output:["_col3","_col5","_col6"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_163] + Top N Key Operator [TNK_80] (rows=816091 width=324) + keys:_col1,top n:100 + Filter Operator [FIL_51] (rows=816091 width=324) + predicate:(_col5 > _col6) + Merge Join Operator [MERGEJOIN_138] (rows=2448274 width=324) + Conds:RS_48._col4=RS_164._col1(Inner),Output:["_col1","_col5","_col6"] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_164] PartitionCols:_col1 - Select Operator [SEL_162] (rows=31 width=115) + Select Operator [SEL_163] (rows=31 width=115) Output:["_col0","_col1"] - Filter Operator [FIL_161] (rows=31 width=123) + Filter Operator [FIL_162] (rows=31 width=123) predicate:CAST( (_col1 / _col2) AS decimal(21,6)) is not null - Group By Operator [GBY_160] (rows=31 width=123) + Group By Operator [GBY_161] (rows=31 width=123) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 - Select Operator [SEL_159] (rows=14291868 width=119) + Select Operator [SEL_160] (rows=14291868 width=119) Output:["_col1","_col2"] - Group By Operator [GBY_158] (rows=14291868 width=119) + Group By Operator [GBY_159] (rows=14291868 width=119) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_32] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_36] PartitionCols:_col0 - Group By Operator [GBY_31] (rows=17467258 width=119) + Group By Operator [GBY_35] (rows=17467258 width=119) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Merge Join Operator [MERGEJOIN_136] (rows=17467258 width=107) - Conds:RS_146._col0=RS_150._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_150] + Merge Join Operator [MERGEJOIN_137] (rows=17467258 width=107) + Conds:RS_150._col0=RS_154._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_154] PartitionCols:_col0 - Select Operator [SEL_148] (rows=652 width=4) + Select Operator [SEL_152] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_147] (rows=652 width=8) + Filter Operator [FIL_151] (rows=652 width=8) predicate:((d_year = 2000) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=8) + TableScan [TS_9] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_150] PartitionCols:_col0 - Select Operator [SEL_144] (rows=53634860 width=119) + Select Operator [SEL_148] (rows=53634860 width=119) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_142] (rows=53634860 width=119) + Filter Operator [FIL_146] (rows=53634860 width=119) predicate:(sr_store_sk is not null and sr_returned_date_sk is not null) - TableScan [TS_3] (rows=57591150 width=119) + TableScan [TS_6] (rows=57591150 width=119) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_47] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_135] (rows=2369298 width=213) - Conds:RS_44._col1=RS_157._col0(Inner),Output:["_col2","_col3","_col5"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_157] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_48] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_136] (rows=2369298 width=213) + Conds:RS_141._col0=RS_46._col1(Inner),Output:["_col1","_col4","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_141] PartitionCols:_col0 - Select Operator [SEL_156] (rows=80000000 width=104) + Select Operator [SEL_140] (rows=80000000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_155] (rows=80000000 width=104) + Filter Operator [FIL_139] (rows=80000000 width=104) predicate:c_customer_sk is not null - TableScan [TS_18] (rows=80000000 width=104) + TableScan [TS_0] (rows=80000000 width=104) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_44] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_46] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_134] (rows=2369298 width=114) - Conds:RS_140._col0=RS_154._col1(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_140] + Merge Join Operator [MERGEJOIN_135] (rows=2369298 width=114) + Conds:RS_144._col0=RS_158._col1(Inner),Output:["_col1","_col2","_col3"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_144] PartitionCols:_col0 - Select Operator [SEL_139] (rows=35 width=4) + Select Operator [SEL_143] (rows=35 width=4) Output:["_col0"] - Filter Operator [FIL_138] (rows=35 width=90) + Filter Operator [FIL_142] (rows=35 width=90) predicate:((s_state = 'NM') and s_store_sk is not null) - TableScan [TS_0] (rows=1704 width=90) + TableScan [TS_3] (rows=1704 width=90) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] - <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_154] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_158] PartitionCols:_col1 - Select Operator [SEL_153] (rows=14291868 width=119) + Select Operator [SEL_157] (rows=14291868 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_152] (rows=14291868 width=119) + Filter Operator [FIL_156] (rows=14291868 width=119) predicate:_col2 is not null - Group By Operator [GBY_151] (rows=14291868 width=119) + Group By Operator [GBY_155] (rows=14291868 width=119) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_14] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col0, _col1 - Group By Operator [GBY_13] (rows=16855704 width=119) + Group By Operator [GBY_16] (rows=16855704 width=119) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Merge Join Operator [MERGEJOIN_133] (rows=16855704 width=107) - Conds:RS_145._col0=RS_149._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] + Merge Join Operator [MERGEJOIN_134] (rows=16855704 width=107) + Conds:RS_149._col0=RS_153._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_153] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_148] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_145] + Please refer to the previous Select Operator [SEL_152] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_149] PartitionCols:_col0 - Select Operator [SEL_143] (rows=51757026 width=119) + Select Operator [SEL_147] (rows=51757026 width=119) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_141] (rows=51757026 width=119) + Filter Operator [FIL_145] (rows=51757026 width=119) predicate:(sr_customer_sk is not null and sr_store_sk is not null and sr_returned_date_sk is not null) - Please refer to the previous TableScan [TS_3] + Please refer to the previous TableScan [TS_6] diff --git a/ql/src/test/results/clientpositive/perf/tez/query11.q.out b/ql/src/test/results/clientpositive/perf/tez/query11.q.out index 02ab587dc1..f21d8c13b6 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query11.q.out @@ -159,273 +159,273 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 23 (BROADCAST_EDGE) -Map 13 <- Reducer 25 (BROADCAST_EDGE) -Map 17 <- Reducer 22 (BROADCAST_EDGE) -Map 9 <- Reducer 24 (BROADCAST_EDGE) -Reducer 10 <- Map 21 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 26 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 15 <- Map 26 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 19 <- Map 26 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE) -Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 26 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 20 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Map 14 <- Reducer 17 (BROADCAST_EDGE) +Map 24 <- Reducer 19 (BROADCAST_EDGE) +Map 25 <- Reducer 21 (BROADCAST_EDGE) +Map 26 <- Reducer 23 (BROADCAST_EDGE) +Reducer 10 <- Map 1 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 1 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 16 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 19 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 20 <- Map 16 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) +Reducer 21 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 16 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 23 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 1 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 13 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_355] - Limit [LIM_354] (rows=100 width=85) + Reducer 5 vectorized + File Output Operator [FS_358] + Limit [LIM_357] (rows=100 width=85) Number of rows:100 - Select Operator [SEL_353] (rows=12248093 width=85) + Select Operator [SEL_356] (rows=12248093 width=85) Output:["_col0"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_93] - Select Operator [SEL_92] (rows=12248093 width=85) + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_98] + Select Operator [SEL_97] (rows=12248093 width=85) Output:["_col0"] - Top N Key Operator [TNK_158] (rows=12248093 width=537) - keys:_col8,top n:100 - Filter Operator [FIL_91] (rows=12248093 width=537) - predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col6) THEN (((_col1 / _col5) > (_col9 / _col3))) ELSE (false) END) ELSE (false) END - Merge Join Operator [MERGEJOIN_288] (rows=24496186 width=537) - Conds:RS_88._col2=RS_352._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col8","_col9"] - <-Reducer 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_352] + Top N Key Operator [TNK_163] (rows=12248093 width=537) + keys:_col1,top n:100 + Filter Operator [FIL_96] (rows=12248093 width=537) + predicate:CASE WHEN (_col6 is not null) THEN (CASE WHEN (_col9) THEN (((_col4 / _col8) > (_col2 / _col6))) ELSE (false) END) ELSE (false) END + Merge Join Operator [MERGEJOIN_291] (rows=24496186 width=537) + Conds:RS_327._col0=RS_94._col2(Inner),Output:["_col1","_col2","_col4","_col6","_col8","_col9"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_327] PartitionCols:_col0 - Select Operator [SEL_351] (rows=80000000 width=297) + Select Operator [SEL_326] (rows=80000000 width=297) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_350] (rows=80000000 width=764) + Group By Operator [GBY_325] (rows=80000000 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_79] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_78] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_285] (rows=187573258 width=764) - Conds:RS_74._col1=RS_318._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_318] + Group By Operator [GBY_17] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_282] (rows=187573258 width=764) + Conds:RS_294._col0=RS_14._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_294] PartitionCols:_col0 - Select Operator [SEL_317] (rows=80000000 width=656) + Select Operator [SEL_293] (rows=80000000 width=656) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_316] (rows=80000000 width=656) + Filter Operator [FIL_292] (rows=80000000 width=656) predicate:(c_customer_sk is not null and c_customer_id is not null) - TableScan [TS_68] (rows=80000000 width=656) + TableScan [TS_0] (rows=80000000 width=656) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_74] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_284] (rows=187573258 width=115) - Conds:RS_349._col0=RS_295._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_295] + Merge Join Operator [MERGEJOIN_281] (rows=187573258 width=115) + Conds:RS_324._col0=RS_304._col0(Inner),Output:["_col1","_col2"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_304] PartitionCols:_col0 - Select Operator [SEL_292] (rows=652 width=4) + Select Operator [SEL_301] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_289] (rows=652 width=8) + Filter Operator [FIL_298] (rows=652 width=8) predicate:((d_year = 2002) and d_date_sk is not null) - TableScan [TS_65] (rows=73049 width=8) + TableScan [TS_6] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_349] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_324] PartitionCols:_col0 - Select Operator [SEL_348] (rows=525327388 width=119) + Select Operator [SEL_323] (rows=525327388 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_347] (rows=525327388 width=221) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_72_date_dim_d_date_sk_min) AND DynamicValue(RS_72_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_72_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_62] (rows=575995635 width=221) + Filter Operator [FIL_322] (rows=525327388 width=221) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_3] (rows=575995635 width=221) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_346] - Group By Operator [GBY_345] (rows=1 width=12) + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_321] + Group By Operator [GBY_320] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_307] - Group By Operator [GBY_303] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_316] + Group By Operator [GBY_312] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_296] (rows=652 width=4) + Select Operator [SEL_305] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_292] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_88] + Please refer to the previous Select Operator [SEL_301] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_94] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_287] (rows=20485011 width=440) - Conds:RS_85._col2=RS_344._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] - <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_344] + Merge Join Operator [MERGEJOIN_290] (rows=20485011 width=440) + Conds:RS_89._col2=RS_355._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_355] PartitionCols:_col0 - Select Operator [SEL_343] (rows=17130654 width=216) + Select Operator [SEL_354] (rows=17130654 width=216) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_342] (rows=17130654 width=212) + Filter Operator [FIL_353] (rows=17130654 width=212) predicate:(_col7 > 0) - Select Operator [SEL_341] (rows=51391963 width=212) + Select Operator [SEL_352] (rows=51391963 width=212) Output:["_col0","_col7"] - Group By Operator [GBY_340] (rows=51391963 width=764) + Group By Operator [GBY_351] (rows=51391963 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_58] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_82] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_57] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_283] (rows=51391963 width=764) - Conds:RS_53._col1=RS_321._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + Group By Operator [GBY_81] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_288] (rows=51391963 width=764) + Conds:RS_297._col0=RS_78._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_297] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_317] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_53] + Please refer to the previous Select Operator [SEL_293] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_78] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_282] (rows=51391963 width=115) - Conds:RS_339._col0=RS_301._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_301] + Merge Join Operator [MERGEJOIN_287] (rows=51391963 width=115) + Conds:RS_350._col0=RS_310._col0(Inner),Output:["_col1","_col2"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_310] PartitionCols:_col0 - Select Operator [SEL_294] (rows=652 width=4) + Select Operator [SEL_303] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_291] (rows=652 width=8) + Filter Operator [FIL_300] (rows=652 width=8) predicate:((d_year = 2001) and d_date_sk is not null) - Please refer to the previous TableScan [TS_65] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_339] + Please refer to the previous TableScan [TS_6] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_350] PartitionCols:_col0 - Select Operator [SEL_338] (rows=143930993 width=119) + Select Operator [SEL_349] (rows=143930993 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_337] (rows=143930993 width=231) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_51_date_dim_d_date_sk_min) AND DynamicValue(RS_51_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_51_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_41] (rows=144002668 width=231) + Filter Operator [FIL_348] (rows=143930993 width=231) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_74_date_dim_d_date_sk_min) AND DynamicValue(RS_74_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_74_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_67] (rows=144002668 width=231) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_336] - Group By Operator [GBY_335] (rows=1 width=12) + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_347] + Group By Operator [GBY_346] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_310] - Group By Operator [GBY_306] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_319] + Group By Operator [GBY_315] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_302] (rows=652 width=4) + Select Operator [SEL_311] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_294] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_85] + Please refer to the previous Select Operator [SEL_303] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_89] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_286] (rows=31888273 width=324) - Conds:RS_324._col0=RS_334._col0(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_334] + Merge Join Operator [MERGEJOIN_289] (rows=31888273 width=324) + Conds:RS_335._col0=RS_345._col0(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_345] PartitionCols:_col0 - Select Operator [SEL_333] (rows=26666666 width=212) + Select Operator [SEL_344] (rows=26666666 width=212) Output:["_col0","_col1"] - Filter Operator [FIL_332] (rows=26666666 width=212) + Filter Operator [FIL_343] (rows=26666666 width=212) predicate:(_col7 > 0) - Select Operator [SEL_331] (rows=80000000 width=212) + Select Operator [SEL_342] (rows=80000000 width=212) Output:["_col0","_col7"] - Group By Operator [GBY_330] (rows=80000000 width=764) + Group By Operator [GBY_341] (rows=80000000 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_37] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_60] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_36] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_281] (rows=187573258 width=764) - Conds:RS_32._col1=RS_320._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + Group By Operator [GBY_59] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_286] (rows=187573258 width=764) + Conds:RS_296._col0=RS_56._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_296] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_317] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_32] + Please refer to the previous Select Operator [SEL_293] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_56] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_280] (rows=187573258 width=115) - Conds:RS_329._col0=RS_299._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_299] + Merge Join Operator [MERGEJOIN_285] (rows=187573258 width=115) + Conds:RS_340._col0=RS_308._col0(Inner),Output:["_col1","_col2"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_308] PartitionCols:_col0 - Select Operator [SEL_293] (rows=652 width=4) + Select Operator [SEL_302] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_290] (rows=652 width=8) + Filter Operator [FIL_299] (rows=652 width=8) predicate:((d_year = 2001) and d_date_sk is not null) - Please refer to the previous TableScan [TS_65] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_329] + Please refer to the previous TableScan [TS_6] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_340] PartitionCols:_col0 - Select Operator [SEL_328] (rows=525327388 width=119) + Select Operator [SEL_339] (rows=525327388 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_327] (rows=525327388 width=221) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_20] (rows=575995635 width=221) + Filter Operator [FIL_338] (rows=525327388 width=221) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_52_date_dim_d_date_sk_min) AND DynamicValue(RS_52_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_52_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_45] (rows=575995635 width=221) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_326] - Group By Operator [GBY_325] (rows=1 width=12) + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_337] + Group By Operator [GBY_336] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_309] - Group By Operator [GBY_305] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_318] + Group By Operator [GBY_314] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_300] (rows=652 width=4) + Select Operator [SEL_309] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_293] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_324] + Please refer to the previous Select Operator [SEL_302] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_335] PartitionCols:_col0 - Select Operator [SEL_323] (rows=51391963 width=212) + Select Operator [SEL_334] (rows=51391963 width=212) Output:["_col0","_col1"] - Group By Operator [GBY_322] (rows=51391963 width=764) + Group By Operator [GBY_333] (rows=51391963 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_39] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_16] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_279] (rows=51391963 width=764) - Conds:RS_12._col1=RS_319._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] + Group By Operator [GBY_38] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_284] (rows=51391963 width=764) + Conds:RS_295._col0=RS_35._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_317] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + Please refer to the previous Select Operator [SEL_293] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_35] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_278] (rows=51391963 width=115) - Conds:RS_315._col0=RS_297._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_297] + Merge Join Operator [MERGEJOIN_283] (rows=51391963 width=115) + Conds:RS_332._col0=RS_306._col0(Inner),Output:["_col1","_col2"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_306] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_292] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_315] + Please refer to the previous Select Operator [SEL_301] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_332] PartitionCols:_col0 - Select Operator [SEL_314] (rows=143930993 width=119) + Select Operator [SEL_331] (rows=143930993 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_313] (rows=143930993 width=231) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=231) + Filter Operator [FIL_330] (rows=143930993 width=231) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_24] (rows=144002668 width=231) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_312] - Group By Operator [GBY_311] (rows=1 width=12) + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_329] + Group By Operator [GBY_328] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_308] - Group By Operator [GBY_304] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_317] + Group By Operator [GBY_313] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_298] (rows=652 width=4) + Select Operator [SEL_307] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_292] + Please refer to the previous Select Operator [SEL_301] diff --git a/ql/src/test/results/clientpositive/perf/tez/query12.q.out b/ql/src/test/results/clientpositive/perf/tez/query12.q.out index e7f6691ffd..d53aaf347a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query12.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query12.q.out @@ -73,87 +73,87 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Map 6 <- Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_83] - Limit [LIM_82] (rows=100 width=802) + Reducer 5 vectorized + File Output Operator [FS_84] + Limit [LIM_83] (rows=100 width=802) Number of rows:100 - Select Operator [SEL_81] (rows=138600 width=801) + Select Operator [SEL_82] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_80] - Select Operator [SEL_79] (rows=138600 width=801) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_81] + Select Operator [SEL_80] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Top N Key Operator [TNK_78] (rows=138600 width=689) + Top N Key Operator [TNK_79] (rows=138600 width=689) keys:_col0, _col1, _col2, _col3, ((_col5 * 100) / sum_window_0),top n:100 - PTF Operator [PTF_77] (rows=138600 width=689) + PTF Operator [PTF_78] (rows=138600 width=689) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col1"}] - Select Operator [SEL_76] (rows=138600 width=689) + Select Operator [SEL_77] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_75] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_76] PartitionCols:_col1 - Group By Operator [GBY_74] (rows=138600 width=689) + Group By Operator [GBY_75] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_16] (rows=138600 width=689) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col9, _col8, _col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_59] (rows=4798568 width=689) - Conds:RS_12._col1=RS_73._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_73] + Group By Operator [GBY_17] (rows=138600 width=689) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col8)"],keys:_col5, _col4, _col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_60] (rows=4798568 width=689) + Conds:RS_63._col0=RS_14._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_63] PartitionCols:_col0 - Select Operator [SEL_72] (rows=138600 width=581) + Select Operator [SEL_62] (rows=138600 width=581) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_71] (rows=138600 width=581) + Filter Operator [FIL_61] (rows=138600 width=581) predicate:((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=581) + TableScan [TS_0] (rows=462000 width=581) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_58] (rows=15995224 width=115) - Conds:RS_70._col0=RS_62._col0(Inner),Output:["_col1","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_62] + Merge Join Operator [MERGEJOIN_59] (rows=15995224 width=115) + Conds:RS_74._col0=RS_66._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_66] PartitionCols:_col0 - Select Operator [SEL_61] (rows=8116 width=4) + Select Operator [SEL_65] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_60] (rows=8116 width=98) + Filter Operator [FIL_64] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00' AND TIMESTAMP'2001-02-11 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=98) + TableScan [TS_6] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_70] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_74] PartitionCols:_col0 - Select Operator [SEL_69] (rows=143966864 width=119) + Select Operator [SEL_73] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_68] (rows=143966864 width=119) + Filter Operator [FIL_72] (rows=143966864 width=119) predicate:(ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=119) + TableScan [TS_3] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_sales_price"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_67] - Group By Operator [GBY_66] (rows=1 width=12) + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_71] + Group By Operator [GBY_70] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_65] - Group By Operator [GBY_64] (rows=1 width=12) + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_69] + Group By Operator [GBY_68] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_63] (rows=8116 width=4) + Select Operator [SEL_67] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_61] + Please refer to the previous Select Operator [SEL_65] diff --git a/ql/src/test/results/clientpositive/perf/tez/query13.q.out b/ql/src/test/results/clientpositive/perf/tez/query13.q.out index 47fb75783a..56051398aa 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query13.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query13.q.out @@ -115,118 +115,118 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 13 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Map 7 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_147] - Select Operator [SEL_146] (rows=1 width=344) + Reducer 6 vectorized + File Output Operator [FS_148] + Select Operator [SEL_147] (rows=1 width=344) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_145] (rows=1 width=256) + Group By Operator [GBY_146] (rows=1 width=256) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_37] - Group By Operator [GBY_36] (rows=1 width=256) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col5)","count(_col5)","sum(_col6)","count(_col6)","sum(_col7)","count(_col7)"] - Merge Join Operator [MERGEJOIN_121] (rows=368553 width=0) - Conds:RS_32._col4=RS_144._col0(Inner),Output:["_col5","_col6","_col7"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_38] + Group By Operator [GBY_37] (rows=1 width=256) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col9)","count(_col9)","sum(_col10)","count(_col10)","sum(_col11)","count(_col11)"] + Merge Join Operator [MERGEJOIN_122] (rows=368553 width=0) + Conds:RS_33._col8=RS_145._col0(Inner),Output:["_col9","_col10","_col11"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] + SHUFFLE [RS_145] PartitionCols:_col0 - Select Operator [SEL_143] (rows=1704 width=4) + Select Operator [SEL_144] (rows=1704 width=4) Output:["_col0"] - Filter Operator [FIL_142] (rows=1704 width=4) + Filter Operator [FIL_143] (rows=1704 width=4) predicate:s_store_sk is not null - TableScan [TS_15] (rows=1704 width=4) + TableScan [TS_19] (rows=1704 width=4) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col4 - Filter Operator [FIL_31] (rows=368553 width=44) - predicate:((_col20 and _col21 and _col11 and _col27) or (_col22 and _col23 and _col12 and _col28) or (_col24 and _col25 and _col13 and _col28)) - Merge Join Operator [MERGEJOIN_120] (rows=1965626 width=44) - Conds:RS_28._col2=RS_141._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col11","_col12","_col13","_col20","_col21","_col22","_col23","_col24","_col25","_col27","_col28"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col8 + Filter Operator [FIL_32] (rows=368553 width=44) + predicate:((_col20 and _col21 and _col15 and _col27) or (_col22 and _col23 and _col16 and _col28) or (_col24 and _col25 and _col17 and _col28)) + Merge Join Operator [MERGEJOIN_121] (rows=1965626 width=44) + Conds:RS_29._col6=RS_142._col0(Inner),Output:["_col8","_col9","_col10","_col11","_col15","_col16","_col17","_col20","_col21","_col22","_col23","_col24","_col25","_col27","_col28"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] + SHUFFLE [RS_142] PartitionCols:_col0 - Select Operator [SEL_140] (rows=1309 width=12) + Select Operator [SEL_141] (rows=1309 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_139] (rows=1309 width=8) + Filter Operator [FIL_140] (rows=1309 width=8) predicate:((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) - TableScan [TS_12] (rows=7200 width=8) + TableScan [TS_16] (rows=7200 width=8) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_28] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_119] (rows=10811694 width=36) - Conds:RS_25._col1=RS_138._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col11","_col12","_col13","_col20","_col21","_col22","_col23","_col24","_col25"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col6 + Merge Join Operator [MERGEJOIN_120] (rows=10811694 width=36) + Conds:RS_26._col5=RS_139._col0(Inner),Output:["_col6","_col8","_col9","_col10","_col11","_col15","_col16","_col17","_col20","_col21","_col22","_col23","_col24","_col25"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] + SHUFFLE [RS_139] PartitionCols:_col0 - Select Operator [SEL_137] (rows=265971 width=28) + Select Operator [SEL_138] (rows=265971 width=28) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_136] (rows=265971 width=183) + Filter Operator [FIL_137] (rows=265971 width=183) predicate:((cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and cd_demo_sk is not null) - TableScan [TS_9] (rows=1861800 width=183) + TableScan [TS_13] (rows=1861800 width=183) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col1 - Filter Operator [FIL_24] (rows=10811694 width=36) - predicate:((_col16 and _col8) or (_col17 and _col9) or (_col18 and _col10)) - Merge Join Operator [MERGEJOIN_118] (rows=14415593 width=36) - Conds:RS_21._col3=RS_135._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col16","_col17","_col18"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_135] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col5 + Filter Operator [FIL_25] (rows=10811694 width=36) + predicate:((_col1 and _col12) or (_col2 and _col13) or (_col3 and _col14)) + Merge Join Operator [MERGEJOIN_119] (rows=14415593 width=36) + Conds:RS_125._col0=RS_23._col3(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] PartitionCols:_col0 - Select Operator [SEL_134] (rows=3529412 width=16) + Select Operator [SEL_124] (rows=3529412 width=16) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_133] (rows=3529412 width=187) + Filter Operator [FIL_123] (rows=3529412 width=187) predicate:((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=187) + TableScan [TS_0] (rows=40000000 width=187) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_21] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_23] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_117] (rows=163376714 width=237) - Conds:RS_132._col0=RS_124._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_124] + Merge Join Operator [MERGEJOIN_118] (rows=163376714 width=237) + Conds:RS_136._col0=RS_128._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] PartitionCols:_col0 - Select Operator [SEL_123] (rows=652 width=4) + Select Operator [SEL_127] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_122] (rows=652 width=8) + Filter Operator [FIL_126] (rows=652 width=8) predicate:((d_year = 2001) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=8) + TableScan [TS_6] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_132] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_136] PartitionCols:_col0 - Select Operator [SEL_131] (rows=457561292 width=260) + Select Operator [SEL_135] (rows=457561292 width=260) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Filter Operator [FIL_130] (rows=457561292 width=450) - predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_addr_sk is not null and ss_hdemo_sk is not null and ss_store_sk is not null and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or (ss_sales_price >= 50) or (ss_sales_price <= 100) or (ss_sales_price >= 150) or (ss_sales_price <= 200)) and ((ss_net_profit >= 100) or (ss_net_profit <= 200) or (ss_net_profit >= 150) or (ss_net_profit <= 300) or (ss_net_profit >= 50) or (ss_net_profit <= 250)) and ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=450) + Filter Operator [FIL_134] (rows=457561292 width=450) + predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_addr_sk is not null and ss_hdemo_sk is not null and ss_store_sk is not null and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or (ss_sales_price >= 50) or (ss_sales_price <= 100) or (ss_sales_price >= 150) or (ss_sales_price <= 200)) and ((ss_net_profit >= 100) or (ss_net_profit <= 200) or (ss_net_profit >= 150) or (ss_net_profit <= 300) or (ss_net_profit >= 50) or (ss_net_profit <= 250)) and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_3] (rows=575995635 width=450) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_129] - Group By Operator [GBY_128] (rows=1 width=12) + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_133] + Group By Operator [GBY_132] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_127] - Group By Operator [GBY_126] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_131] + Group By Operator [GBY_130] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_125] (rows=652 width=4) + Select Operator [SEL_129] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_123] + Please refer to the previous Select Operator [SEL_127] diff --git a/ql/src/test/results/clientpositive/perf/tez/query14.q.out b/ql/src/test/results/clientpositive/perf/tez/query14.q.out index 89783031ee..4a5cb7014c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query14.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query14.q.out @@ -1,6 +1,6 @@ -Warning: Shuffle Join MERGEJOIN[1178][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[1185][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[1192][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 22' is a cross product +Warning: Shuffle Join MERGEJOIN[1181][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[1196][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 20' is a cross product +Warning: Shuffle Join MERGEJOIN[1207][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 27' is a cross product PREHOOK: query: explain with cross_items as (select i_item_sk ss_item_sk @@ -222,52 +222,52 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE) +Map 38 <- Reducer 41 (BROADCAST_EDGE) Map 46 <- Reducer 49 (BROADCAST_EDGE) Map 64 <- Reducer 51 (BROADCAST_EDGE) Map 65 <- Reducer 53 (BROADCAST_EDGE) Map 66 <- Reducer 57 (BROADCAST_EDGE) Map 67 <- Reducer 72 (BROADCAST_EDGE) Map 73 <- Reducer 78 (BROADCAST_EDGE) -Map 79 <- Reducer 17 (BROADCAST_EDGE) -Map 80 <- Reducer 23 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 10 (SIMPLE_EDGE), Map 79 (SIMPLE_EDGE) -Reducer 13 <- Map 24 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 60 (CUSTOM_SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 17 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 10 (SIMPLE_EDGE), Map 80 (SIMPLE_EDGE) -Reducer 19 <- Map 24 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 37 (SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (CUSTOM_SIMPLE_EDGE), Reducer 63 (CUSTOM_SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 23 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 24 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) -Reducer 26 <- Map 24 (SIMPLE_EDGE), Reducer 47 (SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 29 <- Union 28 (SIMPLE_EDGE) -Reducer 3 <- Map 24 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Reducer 26 (SIMPLE_EDGE), Union 31 (CONTAINS) -Reducer 32 <- Union 31 (SIMPLE_EDGE) -Reducer 33 <- Map 24 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) -Reducer 34 <- Reducer 26 (SIMPLE_EDGE), Union 35 (CONTAINS) -Reducer 36 <- Union 35 (SIMPLE_EDGE) -Reducer 37 <- Map 24 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE) -Reducer 38 <- Map 24 (SIMPLE_EDGE), Reducer 50 (SIMPLE_EDGE) -Reducer 39 <- Reducer 38 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 4 <- Reducer 25 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 40 <- Reducer 38 (SIMPLE_EDGE), Union 31 (CONTAINS) -Reducer 41 <- Reducer 38 (SIMPLE_EDGE), Union 35 (CONTAINS) -Reducer 42 <- Map 24 (SIMPLE_EDGE), Reducer 52 (SIMPLE_EDGE) -Reducer 43 <- Reducer 42 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 44 <- Reducer 42 (SIMPLE_EDGE), Union 31 (CONTAINS) -Reducer 45 <- Reducer 42 (SIMPLE_EDGE), Union 35 (CONTAINS) +Map 79 <- Reducer 43 (BROADCAST_EDGE) +Map 80 <- Reducer 45 (BROADCAST_EDGE) +Reducer 10 <- Map 1 (SIMPLE_EDGE), Reducer 47 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 12 (CONTAINS) +Reducer 13 <- Union 12 (SIMPLE_EDGE) +Reducer 14 <- Reducer 10 (SIMPLE_EDGE), Union 15 (CONTAINS) +Reducer 16 <- Union 15 (SIMPLE_EDGE) +Reducer 17 <- Map 1 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 39 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (CUSTOM_SIMPLE_EDGE), Reducer 60 (CUSTOM_SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 21 <- Reducer 10 (SIMPLE_EDGE), Union 22 (CONTAINS) +Reducer 23 <- Union 22 (SIMPLE_EDGE) +Reducer 24 <- Map 1 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Reducer 24 (SIMPLE_EDGE), Reducer 37 (SIMPLE_EDGE) +Reducer 26 <- Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Reducer 26 (CUSTOM_SIMPLE_EDGE), Reducer 63 (CUSTOM_SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 28 <- Map 1 (SIMPLE_EDGE), Reducer 50 (SIMPLE_EDGE) +Reducer 29 <- Reducer 28 (SIMPLE_EDGE), Union 12 (CONTAINS) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 30 <- Reducer 28 (SIMPLE_EDGE), Union 15 (CONTAINS) +Reducer 31 <- Reducer 28 (SIMPLE_EDGE), Union 22 (CONTAINS) +Reducer 32 <- Map 1 (SIMPLE_EDGE), Reducer 52 (SIMPLE_EDGE) +Reducer 33 <- Reducer 32 (SIMPLE_EDGE), Union 12 (CONTAINS) +Reducer 34 <- Reducer 32 (SIMPLE_EDGE), Union 15 (CONTAINS) +Reducer 35 <- Reducer 32 (SIMPLE_EDGE), Union 22 (CONTAINS) +Reducer 36 <- Map 1 (SIMPLE_EDGE), Reducer 42 (SIMPLE_EDGE) +Reducer 37 <- Map 1 (SIMPLE_EDGE), Reducer 44 (SIMPLE_EDGE) +Reducer 39 <- Map 38 (SIMPLE_EDGE), Map 40 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 41 <- Map 40 (CUSTOM_SIMPLE_EDGE) +Reducer 42 <- Map 40 (SIMPLE_EDGE), Map 79 (SIMPLE_EDGE) +Reducer 43 <- Map 40 (CUSTOM_SIMPLE_EDGE) +Reducer 44 <- Map 40 (SIMPLE_EDGE), Map 80 (SIMPLE_EDGE) +Reducer 45 <- Map 40 (CUSTOM_SIMPLE_EDGE) Reducer 47 <- Map 46 (SIMPLE_EDGE), Map 48 (SIMPLE_EDGE) Reducer 49 <- Map 48 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE), Reducer 56 (CUSTOM_SIMPLE_EDGE), Union 6 (CONTAINS) Reducer 50 <- Map 48 (SIMPLE_EDGE), Map 64 (SIMPLE_EDGE) Reducer 51 <- Map 48 (CUSTOM_SIMPLE_EDGE) Reducer 52 <- Map 48 (SIMPLE_EDGE), Map 65 (SIMPLE_EDGE) @@ -276,798 +276,798 @@ Reducer 54 <- Map 48 (SIMPLE_EDGE), Map 66 (SIMPLE_EDGE), Union 55 (CONTAINS) Reducer 56 <- Union 55 (CUSTOM_SIMPLE_EDGE) Reducer 57 <- Map 48 (CUSTOM_SIMPLE_EDGE) Reducer 58 <- Map 48 (SIMPLE_EDGE), Map 66 (SIMPLE_EDGE), Union 59 (CONTAINS) -Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE), Reducer 56 (CUSTOM_SIMPLE_EDGE), Union 7 (CONTAINS) Reducer 60 <- Union 59 (CUSTOM_SIMPLE_EDGE) Reducer 61 <- Map 48 (SIMPLE_EDGE), Map 66 (SIMPLE_EDGE), Union 62 (CONTAINS) Reducer 63 <- Union 62 (CUSTOM_SIMPLE_EDGE) Reducer 68 <- Map 67 (SIMPLE_EDGE), Map 71 (SIMPLE_EDGE), Union 55 (CONTAINS) Reducer 69 <- Map 67 (SIMPLE_EDGE), Map 71 (SIMPLE_EDGE), Union 59 (CONTAINS) +Reducer 7 <- Union 6 (SIMPLE_EDGE) Reducer 70 <- Map 67 (SIMPLE_EDGE), Map 71 (SIMPLE_EDGE), Union 62 (CONTAINS) Reducer 72 <- Map 71 (CUSTOM_SIMPLE_EDGE) Reducer 74 <- Map 73 (SIMPLE_EDGE), Map 77 (SIMPLE_EDGE), Union 55 (CONTAINS) Reducer 75 <- Map 73 (SIMPLE_EDGE), Map 77 (SIMPLE_EDGE), Union 59 (CONTAINS) Reducer 76 <- Map 73 (SIMPLE_EDGE), Map 77 (SIMPLE_EDGE), Union 62 (CONTAINS) Reducer 78 <- Map 77 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Union 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 9 vectorized - File Output Operator [FS_1351] - Limit [LIM_1350] (rows=100 width=223) + Reducer 8 vectorized + File Output Operator [FS_1354] + Limit [LIM_1353] (rows=100 width=223) Number of rows:100 - Select Operator [SEL_1349] (rows=304320 width=222) + Select Operator [SEL_1352] (rows=304320 width=222) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1348] - Select Operator [SEL_1347] (rows=304320 width=222) + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1351] + Select Operator [SEL_1350] (rows=304320 width=222) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_1346] (rows=304320 width=230) + Group By Operator [GBY_1349] (rows=304320 width=230) Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Union 7 [SIMPLE_EDGE] - <-Reducer 16 [CONTAINS] - Reduce Output Operator [RS_1191] + <-Union 6 [SIMPLE_EDGE] + <-Reducer 20 [CONTAINS] + Reduce Output Operator [RS_1202] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1190] (rows=304320 width=230) + Group By Operator [GBY_1201] (rows=304320 width=230) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1189] (rows=121728 width=220) + Top N Key Operator [TNK_1200] (rows=121728 width=220) keys:_col0, _col1, _col2, _col3,top n:100 - Select Operator [SEL_1187] (rows=40576 width=222) + Select Operator [SEL_1198] (rows=40576 width=222) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1186] (rows=40576 width=243) + Filter Operator [FIL_1197] (rows=40576 width=243) predicate:(_col3 > _col5) - Merge Join Operator [MERGEJOIN_1185] (rows=121728 width=243) + Merge Join Operator [MERGEJOIN_1196] (rows=121728 width=243) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1363] - Filter Operator [FIL_1362] (rows=121728 width=131) + <-Reducer 19 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1397] + Filter Operator [FIL_1396] (rows=121728 width=131) predicate:_col3 is not null - Group By Operator [GBY_1361] (rows=121728 width=131) + Group By Operator [GBY_1395] (rows=121728 width=131) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_244] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_246] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_243] (rows=486912 width=131) + Group By Operator [GBY_245] (rows=486912 width=131) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_241] (rows=7790806 width=106) + Select Operator [SEL_243] (rows=7790806 width=106) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1173] (rows=7790806 width=106) - Conds:RS_238._col1=RS_239._col0(Left Semi),Output:["_col2","_col3","_col6","_col7","_col8"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_238] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1149] (rows=7790806 width=110) - Conds:RS_233._col1=RS_1331._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1331] - PartitionCols:_col0 - Select Operator [SEL_1322] (rows=462000 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1313] (rows=462000 width=15) - predicate:i_item_sk is not null - TableScan [TS_6] (rows=462000 width=15) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_233] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1148] (rows=7790806 width=98) - Conds:RS_1356._col0=RS_1293._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1293] - PartitionCols:_col0 - Select Operator [SEL_1290] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_1289] (rows=50 width=12) - predicate:((d_year = 2000) and (d_moy = 11) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 79 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1356] - PartitionCols:_col0 - Select Operator [SEL_1355] (rows=286549727 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1354] (rows=286549727 width=123) - predicate:(cs_sold_date_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_231_date_dim_d_date_sk_min) AND DynamicValue(RS_231_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_231_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_146] (rows=287989836 width=123) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_quantity","cs_list_price"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1353] - Group By Operator [GBY_1352] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1301] - Group By Operator [GBY_1298] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1294] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1290] - <-Reducer 33 [SIMPLE_EDGE] - SHUFFLE [RS_239] + Merge Join Operator [MERGEJOIN_1176] (rows=7790806 width=106) + Conds:RS_240._col5=RS_241._col0(Left Semi),Output:["_col1","_col2","_col3","_col6","_col7"] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_241] PartitionCols:_col0 - Group By Operator [GBY_237] (rows=362 width=4) + Group By Operator [GBY_239] (rows=362 width=4) Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_1156] (rows=724 width=4) - Conds:RS_1332._col1, _col2, _col3=RS_1360._col0, _col1, _col2(Inner),Output:["_col0"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1332] + Merge Join Operator [MERGEJOIN_1159] (rows=724 width=4) + Conds:RS_1316._col1, _col2, _col3=RS_1389._col0, _col1, _col2(Inner),Output:["_col0"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1316] PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1323] (rows=458612 width=15) + Select Operator [SEL_1307] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1314] (rows=458612 width=15) + Filter Operator [FIL_1298] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null and i_item_sk is not null) - Please refer to the previous TableScan [TS_6] - <-Reducer 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1360] + TableScan [TS_0] (rows=462000 width=15) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id"] + <-Reducer 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1389] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1359] (rows=1 width=12) + Select Operator [SEL_1388] (rows=1 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1358] (rows=1 width=20) + Filter Operator [FIL_1387] (rows=1 width=20) predicate:(_col3 = 3L) - Group By Operator [GBY_1357] (rows=121728 width=19) + Group By Operator [GBY_1386] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 31 [SIMPLE_EDGE] - <-Reducer 30 [CONTAINS] vectorized - Reduce Output Operator [RS_1414] + <-Union 15 [SIMPLE_EDGE] + <-Reducer 14 [CONTAINS] vectorized + Reduce Output Operator [RS_1385] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1413] (rows=121728 width=19) + Group By Operator [GBY_1384] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1412] (rows=121728 width=19) + Group By Operator [GBY_1383] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_175] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_180] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_28] (rows=121728 width=19) + Group By Operator [GBY_32] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 - Merge Join Operator [MERGEJOIN_1139] (rows=14628613 width=11) - Conds:RS_24._col1=RS_1328._col0(Inner),Output:["_col4","_col5","_col6"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1328] + Merge Join Operator [MERGEJOIN_1142] (rows=14628613 width=11) + Conds:RS_28._col1=RS_1312._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1312] PartitionCols:_col0 - Select Operator [SEL_1319] (rows=458612 width=15) + Select Operator [SEL_1303] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1310] (rows=458612 width=15) + Filter Operator [FIL_1294] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null and i_item_sk is not null) - Please refer to the previous TableScan [TS_6] + Please refer to the previous TableScan [TS_0] <-Reducer 47 [SIMPLE_EDGE] - SHUFFLE [RS_24] + SHUFFLE [RS_28] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1138] (rows=14736682 width=4) - Conds:RS_1408._col0=RS_1386._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_1141] (rows=14736682 width=4) + Conds:RS_1379._col0=RS_1357._col0(Inner),Output:["_col1"] <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1386] + SHUFFLE [RS_1357] PartitionCols:_col0 - Select Operator [SEL_1385] (rows=1957 width=4) + Select Operator [SEL_1356] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_1384] (rows=1957 width=8) + Filter Operator [FIL_1355] (rows=1957 width=8) predicate:(d_year BETWEEN 1999 AND 2001 and d_date_sk is not null) - TableScan [TS_15] (rows=73049 width=8) + TableScan [TS_19] (rows=73049 width=8) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 46 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1408] + SHUFFLE [RS_1379] PartitionCols:_col0 - Select Operator [SEL_1407] (rows=550076554 width=7) + Select Operator [SEL_1378] (rows=550076554 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_1406] (rows=550076554 width=7) - predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_22_d1_d_date_sk_min) AND DynamicValue(RS_22_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_d1_d_date_sk_bloom_filter))) - TableScan [TS_12] (rows=575995635 width=7) + Filter Operator [FIL_1377] (rows=550076554 width=7) + predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_26_d1_d_date_sk_min) AND DynamicValue(RS_26_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_26_d1_d_date_sk_bloom_filter))) + TableScan [TS_16] (rows=575995635 width=7) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk"] <-Reducer 49 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1405] - Group By Operator [GBY_1404] (rows=1 width=12) + BROADCAST [RS_1376] + Group By Operator [GBY_1375] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 48 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1400] - Group By Operator [GBY_1396] (rows=1 width=12) + SHUFFLE [RS_1371] + Group By Operator [GBY_1367] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1387] (rows=1957 width=4) + Select Operator [SEL_1358] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1385] - <-Reducer 40 [CONTAINS] vectorized - Reduce Output Operator [RS_1428] + Please refer to the previous Select Operator [SEL_1356] + <-Reducer 30 [CONTAINS] vectorized + Reduce Output Operator [RS_1431] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1427] (rows=121728 width=19) + Group By Operator [GBY_1430] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1426] (rows=121728 width=19) + Group By Operator [GBY_1429] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 38 [SIMPLE_EDGE] - SHUFFLE [RS_195] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_200] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_48] (rows=121728 width=19) + Group By Operator [GBY_52] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 - Merge Join Operator [MERGEJOIN_1141] (rows=7620440 width=11) - Conds:RS_44._col1=RS_1329._col0(Inner),Output:["_col4","_col5","_col6"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1329] + Merge Join Operator [MERGEJOIN_1144] (rows=7620440 width=11) + Conds:RS_48._col1=RS_1313._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1313] PartitionCols:_col0 - Select Operator [SEL_1320] (rows=458612 width=15) + Select Operator [SEL_1304] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1311] (rows=458612 width=15) + Filter Operator [FIL_1295] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null and i_item_sk is not null) - Please refer to the previous TableScan [TS_6] + Please refer to the previous TableScan [TS_0] <-Reducer 50 [SIMPLE_EDGE] - SHUFFLE [RS_44] + SHUFFLE [RS_48] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1140] (rows=7676736 width=4) - Conds:RS_1422._col0=RS_1388._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_1143] (rows=7676736 width=4) + Conds:RS_1425._col0=RS_1359._col0(Inner),Output:["_col1"] <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1388] + SHUFFLE [RS_1359] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1385] + Please refer to the previous Select Operator [SEL_1356] <-Map 64 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1422] + SHUFFLE [RS_1425] PartitionCols:_col0 - Select Operator [SEL_1421] (rows=286549727 width=7) + Select Operator [SEL_1424] (rows=286549727 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_1420] (rows=286549727 width=7) - predicate:(cs_sold_date_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_42_d2_d_date_sk_min) AND DynamicValue(RS_42_d2_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_d2_d_date_sk_bloom_filter))) - TableScan [TS_32] (rows=287989836 width=7) + Filter Operator [FIL_1423] (rows=286549727 width=7) + predicate:(cs_sold_date_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_46_d2_d_date_sk_min) AND DynamicValue(RS_46_d2_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_46_d2_d_date_sk_bloom_filter))) + TableScan [TS_36] (rows=287989836 width=7) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk"] <-Reducer 51 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1419] - Group By Operator [GBY_1418] (rows=1 width=12) + BROADCAST [RS_1422] + Group By Operator [GBY_1421] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 48 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1401] - Group By Operator [GBY_1397] (rows=1 width=12) + SHUFFLE [RS_1372] + Group By Operator [GBY_1368] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1389] (rows=1957 width=4) + Select Operator [SEL_1360] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1385] - <-Reducer 44 [CONTAINS] vectorized - Reduce Output Operator [RS_1442] + Please refer to the previous Select Operator [SEL_1356] + <-Reducer 34 [CONTAINS] vectorized + Reduce Output Operator [RS_1445] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1441] (rows=121728 width=19) + Group By Operator [GBY_1444] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1440] (rows=121728 width=19) + Group By Operator [GBY_1443] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 42 [SIMPLE_EDGE] - SHUFFLE [RS_216] + <-Reducer 32 [SIMPLE_EDGE] + SHUFFLE [RS_221] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_69] (rows=121728 width=19) + Group By Operator [GBY_73] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 - Merge Join Operator [MERGEJOIN_1143] (rows=3828623 width=11) - Conds:RS_65._col1=RS_1330._col0(Inner),Output:["_col4","_col5","_col6"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1330] + Merge Join Operator [MERGEJOIN_1146] (rows=3828623 width=11) + Conds:RS_69._col1=RS_1314._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1314] PartitionCols:_col0 - Select Operator [SEL_1321] (rows=458612 width=15) + Select Operator [SEL_1305] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1312] (rows=458612 width=15) + Filter Operator [FIL_1296] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null and i_item_sk is not null) - Please refer to the previous TableScan [TS_6] + Please refer to the previous TableScan [TS_0] <-Reducer 52 [SIMPLE_EDGE] - SHUFFLE [RS_65] + SHUFFLE [RS_69] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1142] (rows=3856907 width=4) - Conds:RS_1436._col0=RS_1390._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_1145] (rows=3856907 width=4) + Conds:RS_1439._col0=RS_1361._col0(Inner),Output:["_col1"] <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1390] + SHUFFLE [RS_1361] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1385] + Please refer to the previous Select Operator [SEL_1356] <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1436] + SHUFFLE [RS_1439] PartitionCols:_col0 - Select Operator [SEL_1435] (rows=143966864 width=7) + Select Operator [SEL_1438] (rows=143966864 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_1434] (rows=143966864 width=7) - predicate:(ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_63_d3_d_date_sk_min) AND DynamicValue(RS_63_d3_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_63_d3_d_date_sk_bloom_filter))) - TableScan [TS_53] (rows=144002668 width=7) + Filter Operator [FIL_1437] (rows=143966864 width=7) + predicate:(ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_67_d3_d_date_sk_min) AND DynamicValue(RS_67_d3_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_67_d3_d_date_sk_bloom_filter))) + TableScan [TS_57] (rows=144002668 width=7) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk"] <-Reducer 53 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1433] - Group By Operator [GBY_1432] (rows=1 width=12) + BROADCAST [RS_1436] + Group By Operator [GBY_1435] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 48 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1402] - Group By Operator [GBY_1398] (rows=1 width=12) + SHUFFLE [RS_1373] + Group By Operator [GBY_1369] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1391] (rows=1957 width=4) + Select Operator [SEL_1362] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1385] + Please refer to the previous Select Operator [SEL_1356] + <-Reducer 36 [SIMPLE_EDGE] + SHUFFLE [RS_240] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_1152] (rows=7790806 width=110) + Conds:RS_1315._col0=RS_236._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1315] + PartitionCols:_col0 + Select Operator [SEL_1306] (rows=462000 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1297] (rows=462000 width=15) + predicate:i_item_sk is not null + Please refer to the previous TableScan [TS_0] + <-Reducer 42 [SIMPLE_EDGE] + SHUFFLE [RS_236] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1151] (rows=7790806 width=98) + Conds:RS_1394._col0=RS_1323._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 40 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1323] + PartitionCols:_col0 + Select Operator [SEL_1320] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_1319] (rows=50 width=12) + predicate:((d_year = 2000) and (d_moy = 11) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 79 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1394] + PartitionCols:_col0 + Select Operator [SEL_1393] (rows=286549727 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1392] (rows=286549727 width=123) + predicate:(cs_sold_date_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_157_date_dim_d_date_sk_min) AND DynamicValue(RS_157_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_157_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_150] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_quantity","cs_list_price"] + <-Reducer 43 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1391] + Group By Operator [GBY_1390] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 40 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1331] + Group By Operator [GBY_1328] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1324] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1320] <-Reducer 60 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1367] - Select Operator [SEL_1366] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_1401] + Select Operator [SEL_1400] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_1365] (rows=1 width=120) + Filter Operator [FIL_1399] (rows=1 width=120) predicate:CAST( (_col0 / _col1) AS decimal(22,6)) is not null - Group By Operator [GBY_1364] (rows=1 width=120) + Group By Operator [GBY_1398] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] <-Union 59 [CUSTOM_SIMPLE_EDGE] <-Reducer 58 [CONTAINS] - Reduce Output Operator [RS_1246] - Group By Operator [GBY_1245] (rows=1 width=120) + Reduce Output Operator [RS_1249] + Group By Operator [GBY_1248] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1244] (rows=26270325 width=44) + Select Operator [SEL_1247] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1242] (rows=14736682 width=0) + Select Operator [SEL_1245] (rows=14736682 width=0) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1241] (rows=14736682 width=0) - Conds:RS_1451._col0=RS_1394._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1244] (rows=14736682 width=0) + Conds:RS_1454._col0=RS_1365._col0(Inner),Output:["_col1","_col2"] <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1394] + SHUFFLE [RS_1365] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1385] + Please refer to the previous Select Operator [SEL_1356] <-Map 66 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1451] + SHUFFLE [RS_1454] PartitionCols:_col0 - Select Operator [SEL_1449] (rows=550076554 width=114) + Select Operator [SEL_1452] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1448] (rows=550076554 width=114) - predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_109_date_dim_d_date_sk_min) AND DynamicValue(RS_109_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_109_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_102] (rows=575995635 width=114) + Filter Operator [FIL_1451] (rows=550076554 width=114) + predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_110_date_dim_d_date_sk_min) AND DynamicValue(RS_110_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_110_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_103] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_quantity","ss_list_price"] <-Reducer 57 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1447] - Group By Operator [GBY_1446] (rows=1 width=12) + BROADCAST [RS_1450] + Group By Operator [GBY_1449] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 48 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1403] - Group By Operator [GBY_1399] (rows=1 width=12) + SHUFFLE [RS_1374] + Group By Operator [GBY_1370] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1393] (rows=1957 width=4) + Select Operator [SEL_1364] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1385] + Please refer to the previous Select Operator [SEL_1356] <-Reducer 69 [CONTAINS] - Reduce Output Operator [RS_1264] - Group By Operator [GBY_1263] (rows=1 width=120) + Reduce Output Operator [RS_1267] + Group By Operator [GBY_1266] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1262] (rows=26270325 width=44) + Select Operator [SEL_1265] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1260] (rows=7676736 width=94) + Select Operator [SEL_1263] (rows=7676736 width=94) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1259] (rows=7676736 width=94) - Conds:RS_1466._col0=RS_1457._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1262] (rows=7676736 width=94) + Conds:RS_1469._col0=RS_1460._col0(Inner),Output:["_col1","_col2"] <-Map 71 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1457] + PARTITION_ONLY_SHUFFLE [RS_1460] PartitionCols:_col0 - Select Operator [SEL_1454] (rows=1957 width=4) + Select Operator [SEL_1457] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_1453] (rows=1957 width=8) + Filter Operator [FIL_1456] (rows=1957 width=8) predicate:(d_year BETWEEN 1998 AND 2000 and d_date_sk is not null) - TableScan [TS_115] (rows=73049 width=8) + TableScan [TS_116] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 67 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1466] + SHUFFLE [RS_1469] PartitionCols:_col0 - Select Operator [SEL_1464] (rows=286549727 width=119) + Select Operator [SEL_1467] (rows=286549727 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1463] (rows=286549727 width=119) - predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_119_date_dim_d_date_sk_min) AND DynamicValue(RS_119_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_119_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_112] (rows=287989836 width=119) + Filter Operator [FIL_1466] (rows=286549727 width=119) + predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_120_date_dim_d_date_sk_min) AND DynamicValue(RS_120_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_120_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_113] (rows=287989836 width=119) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_quantity","cs_list_price"] <-Reducer 72 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1462] - Group By Operator [GBY_1461] (rows=1 width=12) + BROADCAST [RS_1465] + Group By Operator [GBY_1464] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 71 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1460] - Group By Operator [GBY_1459] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1463] + Group By Operator [GBY_1462] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1456] (rows=1957 width=4) + Select Operator [SEL_1459] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1454] + Please refer to the previous Select Operator [SEL_1457] <-Reducer 75 [CONTAINS] - Reduce Output Operator [RS_1282] - Group By Operator [GBY_1281] (rows=1 width=120) + Reduce Output Operator [RS_1285] + Group By Operator [GBY_1284] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1280] (rows=26270325 width=44) + Select Operator [SEL_1283] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1278] (rows=3856907 width=114) + Select Operator [SEL_1281] (rows=3856907 width=114) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1277] (rows=3856907 width=114) - Conds:RS_1481._col0=RS_1472._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1280] (rows=3856907 width=114) + Conds:RS_1484._col0=RS_1475._col0(Inner),Output:["_col1","_col2"] <-Map 77 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1472] + PARTITION_ONLY_SHUFFLE [RS_1475] PartitionCols:_col0 - Select Operator [SEL_1469] (rows=1957 width=4) + Select Operator [SEL_1472] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_1468] (rows=1957 width=8) + Filter Operator [FIL_1471] (rows=1957 width=8) predicate:(d_year BETWEEN 1998 AND 2000 and d_date_sk is not null) - TableScan [TS_126] (rows=73049 width=8) + TableScan [TS_127] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1481] + SHUFFLE [RS_1484] PartitionCols:_col0 - Select Operator [SEL_1479] (rows=143966864 width=119) + Select Operator [SEL_1482] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1478] (rows=143966864 width=119) - predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_130_date_dim_d_date_sk_min) AND DynamicValue(RS_130_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_130_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_123] (rows=144002668 width=119) + Filter Operator [FIL_1481] (rows=143966864 width=119) + predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_131_date_dim_d_date_sk_min) AND DynamicValue(RS_131_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_131_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_124] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_quantity","ws_list_price"] <-Reducer 78 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1477] - Group By Operator [GBY_1476] (rows=1 width=12) + BROADCAST [RS_1480] + Group By Operator [GBY_1479] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 77 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1475] - Group By Operator [GBY_1474] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1478] + Group By Operator [GBY_1477] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1471] (rows=1957 width=4) + Select Operator [SEL_1474] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1469] - <-Reducer 22 [CONTAINS] - Reduce Output Operator [RS_1198] + Please refer to the previous Select Operator [SEL_1472] + <-Reducer 27 [CONTAINS] + Reduce Output Operator [RS_1213] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1197] (rows=304320 width=230) + Group By Operator [GBY_1212] (rows=304320 width=230) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1196] (rows=121728 width=220) + Top N Key Operator [TNK_1211] (rows=121728 width=220) keys:_col0, _col1, _col2, _col3,top n:100 - Select Operator [SEL_1194] (rows=40576 width=218) + Select Operator [SEL_1209] (rows=40576 width=218) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1193] (rows=40576 width=243) + Filter Operator [FIL_1208] (rows=40576 width=243) predicate:(_col3 > _col5) - Merge Join Operator [MERGEJOIN_1192] (rows=121728 width=243) + Merge Join Operator [MERGEJOIN_1207] (rows=121728 width=243) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1379] - Filter Operator [FIL_1378] (rows=121728 width=131) + <-Reducer 26 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1416] + Filter Operator [FIL_1415] (rows=121728 width=131) predicate:_col3 is not null - Group By Operator [GBY_1377] (rows=121728 width=131) + Group By Operator [GBY_1414] (rows=121728 width=131) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_391] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_394] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_390] (rows=243456 width=131) + Group By Operator [GBY_393] (rows=243456 width=131) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_388] (rows=3942084 width=126) + Select Operator [SEL_391] (rows=3942084 width=126) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1174] (rows=3942084 width=126) - Conds:RS_385._col1=RS_386._col0(Left Semi),Output:["_col2","_col3","_col6","_col7","_col8"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_385] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1161] (rows=3942084 width=130) - Conds:RS_380._col1=RS_1333._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1333] - PartitionCols:_col0 - Select Operator [SEL_1324] (rows=462000 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1315] (rows=462000 width=15) - predicate:i_item_sk is not null - Please refer to the previous TableScan [TS_6] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_380] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1160] (rows=3942084 width=118) - Conds:RS_1372._col0=RS_1295._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1295] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1290] - <-Map 80 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1372] - PartitionCols:_col0 - Select Operator [SEL_1371] (rows=143966864 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1370] (rows=143966864 width=123) - predicate:(ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_378_date_dim_d_date_sk_min) AND DynamicValue(RS_378_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_378_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_293] (rows=144002668 width=123) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_quantity","ws_list_price"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1369] - Group By Operator [GBY_1368] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1302] - Group By Operator [GBY_1299] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1296] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1290] - <-Reducer 37 [SIMPLE_EDGE] - SHUFFLE [RS_386] + Merge Join Operator [MERGEJOIN_1177] (rows=3942084 width=126) + Conds:RS_388._col5=RS_389._col0(Left Semi),Output:["_col1","_col2","_col3","_col6","_col7"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_389] PartitionCols:_col0 - Group By Operator [GBY_384] (rows=362 width=4) + Group By Operator [GBY_387] (rows=362 width=4) Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_1168] (rows=724 width=4) - Conds:RS_1334._col1, _col2, _col3=RS_1376._col0, _col1, _col2(Inner),Output:["_col0"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1334] + Merge Join Operator [MERGEJOIN_1171] (rows=724 width=4) + Conds:RS_1318._col1, _col2, _col3=RS_1408._col0, _col1, _col2(Inner),Output:["_col0"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1318] PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1325] (rows=458612 width=15) + Select Operator [SEL_1309] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1316] (rows=458612 width=15) + Filter Operator [FIL_1300] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null and i_item_sk is not null) - Please refer to the previous TableScan [TS_6] - <-Reducer 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1376] + Please refer to the previous TableScan [TS_0] + <-Reducer 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1408] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1375] (rows=1 width=12) + Select Operator [SEL_1407] (rows=1 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1374] (rows=1 width=20) + Filter Operator [FIL_1406] (rows=1 width=20) predicate:(_col3 = 3L) - Group By Operator [GBY_1373] (rows=121728 width=19) + Group By Operator [GBY_1405] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 35 [SIMPLE_EDGE] - <-Reducer 34 [CONTAINS] vectorized - Reduce Output Operator [RS_1417] + <-Union 22 [SIMPLE_EDGE] + <-Reducer 21 [CONTAINS] vectorized + Reduce Output Operator [RS_1404] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1416] (rows=121728 width=19) + Group By Operator [GBY_1403] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1415] (rows=121728 width=19) + Group By Operator [GBY_1402] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_322] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_328] PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_28] - <-Reducer 41 [CONTAINS] vectorized - Reduce Output Operator [RS_1431] + Please refer to the previous Group By Operator [GBY_32] + <-Reducer 31 [CONTAINS] vectorized + Reduce Output Operator [RS_1434] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1430] (rows=121728 width=19) + Group By Operator [GBY_1433] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1429] (rows=121728 width=19) + Group By Operator [GBY_1432] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 38 [SIMPLE_EDGE] - SHUFFLE [RS_342] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_348] PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_48] - <-Reducer 45 [CONTAINS] vectorized - Reduce Output Operator [RS_1445] + Please refer to the previous Group By Operator [GBY_52] + <-Reducer 35 [CONTAINS] vectorized + Reduce Output Operator [RS_1448] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1444] (rows=121728 width=19) + Group By Operator [GBY_1447] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1443] (rows=121728 width=19) + Group By Operator [GBY_1446] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 42 [SIMPLE_EDGE] - SHUFFLE [RS_363] + <-Reducer 32 [SIMPLE_EDGE] + SHUFFLE [RS_369] PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_69] + Please refer to the previous Group By Operator [GBY_73] + <-Reducer 37 [SIMPLE_EDGE] + SHUFFLE [RS_388] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_1164] (rows=3942084 width=130) + Conds:RS_1317._col0=RS_384._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1317] + PartitionCols:_col0 + Select Operator [SEL_1308] (rows=462000 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1299] (rows=462000 width=15) + predicate:i_item_sk is not null + Please refer to the previous TableScan [TS_0] + <-Reducer 44 [SIMPLE_EDGE] + SHUFFLE [RS_384] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1163] (rows=3942084 width=118) + Conds:RS_1413._col0=RS_1325._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 40 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1325] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1320] + <-Map 80 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1413] + PartitionCols:_col0 + Select Operator [SEL_1412] (rows=143966864 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1411] (rows=143966864 width=123) + predicate:(ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_305_date_dim_d_date_sk_min) AND DynamicValue(RS_305_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_305_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_298] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_quantity","ws_list_price"] + <-Reducer 45 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1410] + Group By Operator [GBY_1409] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 40 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1332] + Group By Operator [GBY_1329] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1326] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1320] <-Reducer 63 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1383] - Select Operator [SEL_1382] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_1420] + Select Operator [SEL_1419] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_1381] (rows=1 width=120) + Filter Operator [FIL_1418] (rows=1 width=120) predicate:CAST( (_col0 / _col1) AS decimal(22,6)) is not null - Group By Operator [GBY_1380] (rows=1 width=120) + Group By Operator [GBY_1417] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] <-Union 62 [CUSTOM_SIMPLE_EDGE] <-Reducer 61 [CONTAINS] - Reduce Output Operator [RS_1252] - Group By Operator [GBY_1251] (rows=1 width=120) + Reduce Output Operator [RS_1255] + Group By Operator [GBY_1254] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1250] (rows=26270325 width=44) + Select Operator [SEL_1253] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1248] (rows=14736682 width=0) + Select Operator [SEL_1251] (rows=14736682 width=0) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1247] (rows=14736682 width=0) - Conds:RS_1452._col0=RS_1395._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1250] (rows=14736682 width=0) + Conds:RS_1455._col0=RS_1366._col0(Inner),Output:["_col1","_col2"] <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1395] + SHUFFLE [RS_1366] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1385] + Please refer to the previous Select Operator [SEL_1356] <-Map 66 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1452] + SHUFFLE [RS_1455] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1449] + Please refer to the previous Select Operator [SEL_1452] <-Reducer 70 [CONTAINS] - Reduce Output Operator [RS_1270] - Group By Operator [GBY_1269] (rows=1 width=120) + Reduce Output Operator [RS_1273] + Group By Operator [GBY_1272] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1268] (rows=26270325 width=44) + Select Operator [SEL_1271] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1266] (rows=7676736 width=94) + Select Operator [SEL_1269] (rows=7676736 width=94) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1265] (rows=7676736 width=94) - Conds:RS_1467._col0=RS_1458._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1268] (rows=7676736 width=94) + Conds:RS_1470._col0=RS_1461._col0(Inner),Output:["_col1","_col2"] <-Map 71 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1458] + PARTITION_ONLY_SHUFFLE [RS_1461] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1454] + Please refer to the previous Select Operator [SEL_1457] <-Map 67 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1467] + SHUFFLE [RS_1470] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1464] + Please refer to the previous Select Operator [SEL_1467] <-Reducer 76 [CONTAINS] - Reduce Output Operator [RS_1288] - Group By Operator [GBY_1287] (rows=1 width=120) + Reduce Output Operator [RS_1291] + Group By Operator [GBY_1290] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1286] (rows=26270325 width=44) + Select Operator [SEL_1289] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1284] (rows=3856907 width=114) + Select Operator [SEL_1287] (rows=3856907 width=114) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1283] (rows=3856907 width=114) - Conds:RS_1482._col0=RS_1473._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1286] (rows=3856907 width=114) + Conds:RS_1485._col0=RS_1476._col0(Inner),Output:["_col1","_col2"] <-Map 77 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1473] + PARTITION_ONLY_SHUFFLE [RS_1476] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1469] + Please refer to the previous Select Operator [SEL_1472] <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1482] + SHUFFLE [RS_1485] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1479] - <-Reducer 6 [CONTAINS] - Reduce Output Operator [RS_1184] + Please refer to the previous Select Operator [SEL_1482] + <-Reducer 5 [CONTAINS] + Reduce Output Operator [RS_1187] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1183] (rows=304320 width=230) + Group By Operator [GBY_1186] (rows=304320 width=230) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1182] (rows=121728 width=220) + Top N Key Operator [TNK_1185] (rows=121728 width=220) keys:_col0, _col1, _col2, _col3,top n:100 - Select Operator [SEL_1180] (rows=40576 width=220) + Select Operator [SEL_1183] (rows=40576 width=220) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1179] (rows=40576 width=243) + Filter Operator [FIL_1182] (rows=40576 width=243) predicate:(_col3 > _col5) - Merge Join Operator [MERGEJOIN_1178] (rows=121728 width=243) + Merge Join Operator [MERGEJOIN_1181] (rows=121728 width=243) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1341] - Filter Operator [FIL_1340] (rows=121728 width=131) + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1344] + Filter Operator [FIL_1343] (rows=121728 width=131) predicate:_col3 is not null - Group By Operator [GBY_1339] (rows=121728 width=131) + Group By Operator [GBY_1342] (rows=121728 width=131) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_98] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_99] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_97] (rows=121728 width=131) + Group By Operator [GBY_98] (rows=121728 width=131) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_95] (rows=15062131 width=11) + Select Operator [SEL_96] (rows=15062131 width=11) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1172] (rows=15062131 width=11) - Conds:RS_92._col1=RS_93._col0(Left Semi),Output:["_col2","_col3","_col6","_col7","_col8"] - <-Reducer 25 [SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_1175] (rows=15062131 width=11) + Conds:RS_93._col5=RS_94._col0(Left Semi),Output:["_col1","_col2","_col3","_col6","_col7"] + <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_93] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_1140] (rows=15062131 width=15) + Conds:RS_1310._col0=RS_89._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1310] + PartitionCols:_col0 + Select Operator [SEL_1301] (rows=462000 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1292] (rows=462000 width=15) + predicate:i_item_sk is not null + Please refer to the previous TableScan [TS_0] + <-Reducer 39 [SIMPLE_EDGE] + SHUFFLE [RS_89] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1139] (rows=15062131 width=4) + Conds:RS_1337._col0=RS_1321._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 40 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1321] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1320] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1337] + PartitionCols:_col0 + Select Operator [SEL_1336] (rows=550076554 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1335] (rows=550076554 width=118) + predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_3] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_quantity","ss_list_price"] + <-Reducer 41 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1334] + Group By Operator [GBY_1333] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 40 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1330] + Group By Operator [GBY_1327] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1322] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1320] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_94] PartitionCols:_col0 - Group By Operator [GBY_91] (rows=362 width=4) + Group By Operator [GBY_92] (rows=362 width=4) Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_1144] (rows=724 width=4) - Conds:RS_1327._col1, _col2, _col3=RS_1338._col0, _col1, _col2(Inner),Output:["_col0"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1327] + Merge Join Operator [MERGEJOIN_1147] (rows=724 width=4) + Conds:RS_1311._col1, _col2, _col3=RS_1341._col0, _col1, _col2(Inner),Output:["_col0"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1311] PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1318] (rows=458612 width=15) + Select Operator [SEL_1302] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1309] (rows=458612 width=15) + Filter Operator [FIL_1293] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null and i_item_sk is not null) - Please refer to the previous TableScan [TS_6] - <-Reducer 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1338] + Please refer to the previous TableScan [TS_0] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1341] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1337] (rows=1 width=12) + Select Operator [SEL_1340] (rows=1 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1336] (rows=1 width=20) + Filter Operator [FIL_1339] (rows=1 width=20) predicate:(_col3 = 3L) - Group By Operator [GBY_1335] (rows=121728 width=19) + Group By Operator [GBY_1338] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 28 [SIMPLE_EDGE] - <-Reducer 27 [CONTAINS] vectorized - Reduce Output Operator [RS_1411] + <-Union 12 [SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] vectorized + Reduce Output Operator [RS_1382] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1410] (rows=121728 width=19) + Group By Operator [GBY_1381] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1409] (rows=121728 width=19) + Group By Operator [GBY_1380] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_29] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_33] PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_28] - <-Reducer 39 [CONTAINS] vectorized - Reduce Output Operator [RS_1425] + Please refer to the previous Group By Operator [GBY_32] + <-Reducer 29 [CONTAINS] vectorized + Reduce Output Operator [RS_1428] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1424] (rows=121728 width=19) + Group By Operator [GBY_1427] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1423] (rows=121728 width=19) + Group By Operator [GBY_1426] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 38 [SIMPLE_EDGE] - SHUFFLE [RS_49] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_53] PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_48] - <-Reducer 43 [CONTAINS] vectorized - Reduce Output Operator [RS_1439] + Please refer to the previous Group By Operator [GBY_52] + <-Reducer 33 [CONTAINS] vectorized + Reduce Output Operator [RS_1442] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1438] (rows=121728 width=19) + Group By Operator [GBY_1441] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1437] (rows=121728 width=19) + Group By Operator [GBY_1440] (rows=121728 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 42 [SIMPLE_EDGE] - SHUFFLE [RS_70] + <-Reducer 32 [SIMPLE_EDGE] + SHUFFLE [RS_74] PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_69] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_92] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1137] (rows=15062131 width=15) - Conds:RS_87._col1=RS_1326._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1326] - PartitionCols:_col0 - Select Operator [SEL_1317] (rows=462000 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1308] (rows=462000 width=15) - predicate:i_item_sk is not null - Please refer to the previous TableScan [TS_6] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_87] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1136] (rows=15062131 width=4) - Conds:RS_1307._col0=RS_1291._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1291] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1290] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1307] - PartitionCols:_col0 - Select Operator [SEL_1306] (rows=550076554 width=118) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1305] (rows=550076554 width=118) - predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_85_date_dim_d_date_sk_min) AND DynamicValue(RS_85_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_85_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=118) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_quantity","ss_list_price"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1304] - Group By Operator [GBY_1303] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1300] - Group By Operator [GBY_1297] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1292] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1290] + Please refer to the previous Group By Operator [GBY_73] <-Reducer 56 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1345] - Select Operator [SEL_1344] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_1348] + Select Operator [SEL_1347] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_1343] (rows=1 width=120) + Filter Operator [FIL_1346] (rows=1 width=120) predicate:CAST( (_col0 / _col1) AS decimal(22,6)) is not null - Group By Operator [GBY_1342] (rows=1 width=120) + Group By Operator [GBY_1345] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] <-Union 55 [CUSTOM_SIMPLE_EDGE] <-Reducer 54 [CONTAINS] - Reduce Output Operator [RS_1240] - Group By Operator [GBY_1239] (rows=1 width=120) + Reduce Output Operator [RS_1243] + Group By Operator [GBY_1242] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1238] (rows=26270325 width=44) + Select Operator [SEL_1241] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1236] (rows=14736682 width=0) + Select Operator [SEL_1239] (rows=14736682 width=0) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1235] (rows=14736682 width=0) - Conds:RS_1450._col0=RS_1392._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1238] (rows=14736682 width=0) + Conds:RS_1453._col0=RS_1363._col0(Inner),Output:["_col1","_col2"] <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1392] + SHUFFLE [RS_1363] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1385] + Please refer to the previous Select Operator [SEL_1356] <-Map 66 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1450] + SHUFFLE [RS_1453] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1449] + Please refer to the previous Select Operator [SEL_1452] <-Reducer 68 [CONTAINS] - Reduce Output Operator [RS_1258] - Group By Operator [GBY_1257] (rows=1 width=120) + Reduce Output Operator [RS_1261] + Group By Operator [GBY_1260] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1256] (rows=26270325 width=44) + Select Operator [SEL_1259] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1254] (rows=7676736 width=94) + Select Operator [SEL_1257] (rows=7676736 width=94) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1253] (rows=7676736 width=94) - Conds:RS_1465._col0=RS_1455._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1256] (rows=7676736 width=94) + Conds:RS_1468._col0=RS_1458._col0(Inner),Output:["_col1","_col2"] <-Map 71 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1455] + PARTITION_ONLY_SHUFFLE [RS_1458] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1454] + Please refer to the previous Select Operator [SEL_1457] <-Map 67 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1465] + SHUFFLE [RS_1468] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1464] + Please refer to the previous Select Operator [SEL_1467] <-Reducer 74 [CONTAINS] - Reduce Output Operator [RS_1276] - Group By Operator [GBY_1275] (rows=1 width=120) + Reduce Output Operator [RS_1279] + Group By Operator [GBY_1278] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1274] (rows=26270325 width=44) + Select Operator [SEL_1277] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1272] (rows=3856907 width=114) + Select Operator [SEL_1275] (rows=3856907 width=114) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1271] (rows=3856907 width=114) - Conds:RS_1480._col0=RS_1470._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1274] (rows=3856907 width=114) + Conds:RS_1483._col0=RS_1473._col0(Inner),Output:["_col1","_col2"] <-Map 77 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1470] + PARTITION_ONLY_SHUFFLE [RS_1473] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1469] + Please refer to the previous Select Operator [SEL_1472] <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1480] + SHUFFLE [RS_1483] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1479] + Please refer to the previous Select Operator [SEL_1482] diff --git a/ql/src/test/results/clientpositive/perf/tez/query16.q.out b/ql/src/test/results/clientpositive/perf/tez/query16.q.out index fc72b5d2b3..ba8cccf952 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query16.q.out @@ -73,149 +73,151 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 12 (BROADCAST_EDGE) -Map 14 <- Reducer 9 (BROADCAST_EDGE) -Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Map 10 <- Reducer 9 (BROADCAST_EDGE) +Map 14 <- Reducer 8 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) Reducer 16 <- Map 15 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 1 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_160] - Group By Operator [GBY_159] (rows=1 width=232) + Reducer 7 vectorized + File Output Operator [FS_161] + Group By Operator [GBY_160] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_158] - Group By Operator [GBY_157] (rows=1 width=232) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_159] + Group By Operator [GBY_158] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_156] (rows=5150256 width=228) + Group By Operator [GBY_157] (rows=5150256 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_70] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_71] PartitionCols:_col0 - Group By Operator [GBY_69] (rows=5150256 width=228) + Group By Operator [GBY_70] (rows=5150256 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 - Select Operator [SEL_42] (rows=5150256 width=214) + Select Operator [SEL_43] (rows=5150256 width=214) Output:["_col4","_col5","_col6"] - Filter Operator [FIL_41] (rows=5150256 width=214) + Filter Operator [FIL_42] (rows=5150256 width=214) predicate:_col13 is null - Merge Join Operator [MERGEJOIN_126] (rows=10300512 width=214) - Conds:RS_38._col4=RS_155._col1(Left Outer),Output:["_col4","_col5","_col6","_col13"] + Merge Join Operator [MERGEJOIN_127] (rows=10300512 width=214) + Conds:RS_39._col4=RS_156._col1(Left Outer),Output:["_col4","_col5","_col6","_col13"] <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_155] + SHUFFLE [RS_156] PartitionCols:_col1 - Select Operator [SEL_154] (rows=18238808 width=8) + Select Operator [SEL_155] (rows=18238808 width=8) Output:["_col0","_col1"] - Group By Operator [GBY_153] (rows=18238808 width=4) + Group By Operator [GBY_154] (rows=18238808 width=4) Output:["_col0"],keys:KEY._col0 <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_152] + SHUFFLE [RS_153] PartitionCols:_col0 - Group By Operator [GBY_151] (rows=28798881 width=4) + Group By Operator [GBY_152] (rows=28798881 width=4) Output:["_col0"],keys:cr_order_number - Filter Operator [FIL_150] (rows=28798881 width=4) + Filter Operator [FIL_151] (rows=28798881 width=4) predicate:cr_order_number is not null - TableScan [TS_25] (rows=28798881 width=4) + TableScan [TS_26] (rows=28798881 width=4) default@catalog_returns,cr1,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_order_number"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_38] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_39] PartitionCols:_col4 - Select Operator [SEL_37] (rows=5150256 width=200) + Select Operator [SEL_38] (rows=5150256 width=200) Output:["_col4","_col5","_col6"] - Merge Join Operator [MERGEJOIN_125] (rows=5150256 width=202) - Conds:RS_34._col4=RS_149._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} - <-Reducer 4 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_34] + Merge Join Operator [MERGEJOIN_126] (rows=5150256 width=202) + Conds:RS_35._col4=RS_150._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} + <-Reducer 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_35] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_124] (rows=5150256 width=200) - Conds:RS_18._col2=RS_143._col0(Inner),Output:["_col3","_col4","_col5","_col6"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] - PartitionCols:_col0 - Select Operator [SEL_142] (rows=10 width=102) - Output:["_col0"] - Filter Operator [FIL_141] (rows=10 width=102) - predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) - TableScan [TS_9] (rows=60 width=102) - default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_county"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_123] (rows=30901534 width=230) - Conds:RS_15._col1=RS_129._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_129] - PartitionCols:_col0 - Select Operator [SEL_128] (rows=784314 width=90) - Output:["_col0"] - Filter Operator [FIL_127] (rows=784314 width=90) - predicate:((ca_state = 'NY') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=90) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_122] (rows=31519516 width=234) - Conds:RS_137._col0=RS_140._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_137] - PartitionCols:_col0 - Select Operator [SEL_136] (rows=283695062 width=243) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_135] (rows=283695062 width=243) - predicate:(cs_ship_date_sk is not null and cs_call_center_sk is not null and cs_ship_addr_sk is not null and cs_order_number is not null and cs_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(cs_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) - TableScan [TS_0] (rows=287989836 width=243) - default@catalog_sales,cs1,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_134] - Group By Operator [GBY_133] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_132] - Group By Operator [GBY_131] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_130] (rows=784314 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_128] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_140] - PartitionCols:_col0 - Select Operator [SEL_139] (rows=8116 width=98) - Output:["_col0"] - Filter Operator [FIL_138] (rows=8116 width=98) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=98) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + Select Operator [SEL_22] (rows=5150256 width=200) + Output:["_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_125] (rows=5150256 width=200) + Conds:RS_19._col4=RS_144._col0(Inner),Output:["_col5","_col6","_col7","_col8"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_144] + PartitionCols:_col0 + Select Operator [SEL_143] (rows=10 width=102) + Output:["_col0"] + Filter Operator [FIL_142] (rows=10 width=102) + predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) + TableScan [TS_13] (rows=60 width=102) + default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_county"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_124] (rows=30901534 width=230) + Conds:RS_130._col0=RS_17._col1(Inner),Output:["_col4","_col5","_col6","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_130] + PartitionCols:_col0 + Select Operator [SEL_129] (rows=784314 width=90) + Output:["_col0"] + Filter Operator [FIL_128] (rows=784314 width=90) + predicate:((ca_state = 'NY') and ca_address_sk is not null) + TableScan [TS_0] (rows=40000000 width=90) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_123] (rows=31519516 width=234) + Conds:RS_138._col0=RS_141._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_138] + PartitionCols:_col0 + Select Operator [SEL_137] (rows=283695062 width=243) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_136] (rows=283695062 width=243) + predicate:(cs_ship_date_sk is not null and cs_call_center_sk is not null and cs_ship_addr_sk is not null and cs_order_number is not null and cs_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(cs_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) + TableScan [TS_3] (rows=287989836 width=243) + default@catalog_sales,cs1,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_135] + Group By Operator [GBY_134] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_133] + Group By Operator [GBY_132] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_131] (rows=784314 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_129] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_141] + PartitionCols:_col0 + Select Operator [SEL_140] (rows=8116 width=98) + Output:["_col0"] + Filter Operator [FIL_139] (rows=8116 width=98) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] + SHUFFLE [RS_150] PartitionCols:_col0 - Group By Operator [GBY_148] (rows=286548719 width=7) + Group By Operator [GBY_149] (rows=286548719 width=7) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_147] (rows=286548719 width=7) + Select Operator [SEL_148] (rows=286548719 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_146] (rows=286548719 width=7) - predicate:(cs_warehouse_sk is not null and cs_order_number is not null and cs_order_number BETWEEN DynamicValue(RS_34_cs1_cs_order_number_min) AND DynamicValue(RS_34_cs1_cs_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_34_cs1_cs_order_number_bloom_filter))) - TableScan [TS_22] (rows=287989836 width=7) + Filter Operator [FIL_147] (rows=286548719 width=7) + predicate:(cs_warehouse_sk is not null and cs_order_number is not null and cs_order_number BETWEEN DynamicValue(RS_35_cs1_cs_order_number_min) AND DynamicValue(RS_35_cs1_cs_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_35_cs1_cs_order_number_bloom_filter))) + TableScan [TS_23] (rows=287989836 width=7) default@catalog_sales,cs2,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_warehouse_sk","cs_order_number"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_145] - Group By Operator [GBY_144] (rows=1 width=12) + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_146] + Group By Operator [GBY_145] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_112] - Group By Operator [GBY_111] (rows=1 width=12) + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_113] + Group By Operator [GBY_112] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_110] (rows=5150256 width=8) + Select Operator [SEL_111] (rows=5150256 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_124] + Please refer to the previous Select Operator [SEL_22] diff --git a/ql/src/test/results/clientpositive/perf/tez/query18.q.out b/ql/src/test/results/clientpositive/perf/tez/query18.q.out index 515f120a27..ac4ff60537 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query18.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query18.q.out @@ -81,14 +81,14 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 9 <- Reducer 14 (BROADCAST_EDGE) -Reducer 10 <- Map 13 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 16 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Map 11 <- Reducer 15 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 13 <- Map 16 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) @@ -97,124 +97,122 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_176] - Limit [LIM_175] (rows=100 width=1165) + File Output Operator [FS_177] + Limit [LIM_176] (rows=100 width=1165) Number of rows:100 - Select Operator [SEL_174] (rows=79918180 width=1165) + Select Operator [SEL_175] (rows=79918180 width=1165) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_173] - Select Operator [SEL_172] (rows=79918180 width=1165) + SHUFFLE [RS_174] + Select Operator [SEL_173] (rows=79918180 width=1165) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Top N Key Operator [TNK_171] (rows=79918180 width=1229) + Top N Key Operator [TNK_172] (rows=79918180 width=1229) keys:_col2, _col1, _col0, _col3,top n:100 - Group By Operator [GBY_170] (rows=79918180 width=1229) + Group By Operator [GBY_171] (rows=79918180 width=1229) Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)","sum(VALUE._col8)","count(VALUE._col9)","sum(VALUE._col10)","count(VALUE._col11)","sum(VALUE._col12)","count(VALUE._col13)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_42] + SHUFFLE [RS_43] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_41] (rows=79918180 width=1229) + Group By Operator [GBY_42] (rows=79918180 width=1229) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(_col15)","count(_col15)","sum(_col16)","count(_col16)","sum(_col17)","count(_col17)","sum(_col18)","count(_col18)","sum(_col19)","count(_col19)","sum(_col3)","count(_col3)","sum(_col22)","count(_col22)"],keys:_col5, _col6, _col7, _col10, 0L - Merge Join Operator [MERGEJOIN_143] (rows=15983636 width=1116) - Conds:RS_37._col0=RS_38._col3(Inner),Output:["_col3","_col5","_col6","_col7","_col10","_col15","_col16","_col17","_col18","_col19","_col22"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_38] + Merge Join Operator [MERGEJOIN_144] (rows=15983636 width=1116) + Conds:RS_38._col0=RS_39._col3(Inner),Output:["_col3","_col5","_col6","_col7","_col10","_col15","_col16","_col17","_col18","_col19","_col22"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_39] PartitionCols:_col3 - Select Operator [SEL_30] (rows=15983636 width=735) - Output:["_col1","_col3","_col6","_col7","_col8","_col9","_col10","_col13"] - Merge Join Operator [MERGEJOIN_142] (rows=15983636 width=735) - Conds:RS_27._col3=RS_169._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col8","_col11","_col13"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_169] - PartitionCols:_col0 - Select Operator [SEL_168] (rows=462000 width=104) - Output:["_col0","_col1"] - Filter Operator [FIL_167] (rows=462000 width=104) - predicate:i_item_sk is not null - TableScan [TS_18] (rows=462000 width=104) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_141] (rows=15983636 width=639) - Conds:RS_24._col2=RS_166._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col11"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_166] - PartitionCols:_col0 - Select Operator [SEL_165] (rows=103434 width=116) - Output:["_col0","_col1"] - Filter Operator [FIL_164] (rows=103434 width=187) - predicate:((cd_education_status = 'College') and (cd_gender = 'M') and cd_demo_sk is not null) - TableScan [TS_15] (rows=1861800 width=187) - default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_education_status","cd_dep_count"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_140] (rows=100578970 width=565) - Conds:RS_163._col0=RS_155._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_155] - PartitionCols:_col0 - Select Operator [SEL_154] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_153] (rows=652 width=8) - predicate:((d_year = 2001) and d_date_sk is not null) - TableScan [TS_12] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_163] - PartitionCols:_col0 - Select Operator [SEL_162] (rows=283692098 width=573) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_161] (rows=283692098 width=466) - predicate:(cs_sold_date_sk is not null and cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_9] (rows=287989836 width=466) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_bill_cdemo_sk","cs_item_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt","cs_net_profit"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_160] - Group By Operator [GBY_159] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_158] - Group By Operator [GBY_157] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_156] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_154] + Merge Join Operator [MERGEJOIN_143] (rows=15983636 width=735) + Conds:RS_156._col0=RS_29._col3(Inner),Output:["_col1","_col3","_col6","_col7","_col8","_col9","_col10","_col13"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_156] + PartitionCols:_col0 + Select Operator [SEL_155] (rows=462000 width=104) + Output:["_col0","_col1"] + Filter Operator [FIL_154] (rows=462000 width=104) + predicate:i_item_sk is not null + TableScan [TS_9] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_142] (rows=15983636 width=639) + Conds:RS_24._col2=RS_170._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col11"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_170] + PartitionCols:_col0 + Select Operator [SEL_169] (rows=103434 width=116) + Output:["_col0","_col1"] + Filter Operator [FIL_168] (rows=103434 width=187) + predicate:((cd_education_status = 'College') and (cd_gender = 'M') and cd_demo_sk is not null) + TableScan [TS_18] (rows=1861800 width=187) + default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_education_status","cd_dep_count"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_141] (rows=100578970 width=565) + Conds:RS_167._col0=RS_159._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 14 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_159] + PartitionCols:_col0 + Select Operator [SEL_158] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_157] (rows=652 width=8) + predicate:((d_year = 2001) and d_date_sk is not null) + TableScan [TS_15] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_167] + PartitionCols:_col0 + Select Operator [SEL_166] (rows=283692098 width=573) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_165] (rows=283692098 width=466) + predicate:(cs_sold_date_sk is not null and cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_12] (rows=287989836 width=466) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_bill_cdemo_sk","cs_item_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt","cs_net_profit"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_164] + Group By Operator [GBY_163] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_162] + Group By Operator [GBY_161] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_160] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_158] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_37] + SHUFFLE [RS_38] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_139] (rows=4959744 width=368) - Conds:RS_34._col1=RS_152._col0(Inner),Output:["_col0","_col3","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_140] (rows=4959744 width=368) + Conds:RS_35._col1=RS_153._col0(Inner),Output:["_col0","_col3","_col5","_col6","_col7"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_152] + SHUFFLE [RS_153] PartitionCols:_col0 - Select Operator [SEL_151] (rows=1861800 width=4) + Select Operator [SEL_152] (rows=1861800 width=4) Output:["_col0"] - Filter Operator [FIL_150] (rows=1861800 width=4) + Filter Operator [FIL_151] (rows=1861800 width=4) predicate:cd_demo_sk is not null TableScan [TS_6] (rows=1861800 width=4) default@customer_demographics,cd2,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_34] + SHUFFLE [RS_35] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_138] (rows=4890586 width=371) - Conds:RS_146._col2=RS_149._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_139] (rows=4890586 width=371) + Conds:RS_147._col2=RS_150._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] + SHUFFLE [RS_147] PartitionCols:_col2 - Select Operator [SEL_145] (rows=35631408 width=119) + Select Operator [SEL_146] (rows=35631408 width=119) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_144] (rows=35631408 width=19) + Filter Operator [FIL_145] (rows=35631408 width=19) predicate:((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_cdemo_sk is not null and c_customer_sk is not null and c_current_addr_sk is not null) TableScan [TS_0] (rows=80000000 width=19) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk","c_birth_month","c_birth_year"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] + SHUFFLE [RS_150] PartitionCols:_col0 - Select Operator [SEL_148] (rows=5490196 width=285) + Select Operator [SEL_149] (rows=5490196 width=285) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_147] (rows=5490196 width=285) + Filter Operator [FIL_148] (rows=5490196 width=285) predicate:((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null) TableScan [TS_3] (rows=40000000 width=285) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state","ca_country"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query1b.q.out b/ql/src/test/results/clientpositive/perf/tez/query1b.q.out index fbdeb33434..7e46565627 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query1b.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query1b.q.out @@ -65,71 +65,56 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 10 <- Reducer 7 (BROADCAST_EDGE) - Map 3 <- Map 11 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (BROADCAST_EDGE), Map 3 (SIMPLE_EDGE) - Reducer 5 <- Map 10 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Reducer 8 (BROADCAST_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 7 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 3 (SIMPLE_EDGE) + Map 1 <- Reducer 8 (BROADCAST_EDGE) + Map 6 <- Map 10 (BROADCAST_EDGE), Map 11 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Reducer 9 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 4 (BROADCAST_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: store - filterExpr: ((s_state = 'NM') and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 1704 Data size: 153360 Basic stats: COMPLETE Column stats: COMPLETE + alias: customer + filterExpr: (c_customer_sk is not null and c_customer_sk BETWEEN DynamicValue(RS_46_store_returns_sr_customer_sk_min) AND DynamicValue(RS_46_store_returns_sr_customer_sk_max) and in_bloom_filter(c_customer_sk, DynamicValue(RS_46_store_returns_sr_customer_sk_bloom_filter))) (type: boolean) + Statistics: Num rows: 80000000 Data size: 8320000000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((s_state = 'NM') and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 35 Data size: 3150 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (c_customer_sk is not null and c_customer_sk BETWEEN DynamicValue(RS_46_store_returns_sr_customer_sk_min) AND DynamicValue(RS_46_store_returns_sr_customer_sk_max) and in_bloom_filter(c_customer_sk, DynamicValue(RS_46_store_returns_sr_customer_sk_bloom_filter))) (type: boolean) + Statistics: Num rows: 80000000 Data size: 8320000000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 35 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE + expressions: c_customer_sk (type: int), c_customer_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 80000000 Data size: 8320000000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 35 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 35 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=35) - minReductionHashAggr: 0.9714286 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + Statistics: Num rows: 80000000 Data size: 8320000000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized Map 10 Map Operator Tree: TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_customer_sk BETWEEN DynamicValue(RS_44_store_returns_sr_customer_sk_min) AND DynamicValue(RS_44_store_returns_sr_customer_sk_max) and in_bloom_filter(c_customer_sk, DynamicValue(RS_44_store_returns_sr_customer_sk_bloom_filter))) (type: boolean) - Statistics: Num rows: 80000000 Data size: 8320000000 Basic stats: COMPLETE Column stats: COMPLETE + alias: date_dim + filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (c_customer_sk is not null and c_customer_sk BETWEEN DynamicValue(RS_44_store_returns_sr_customer_sk_min) AND DynamicValue(RS_44_store_returns_sr_customer_sk_max) and in_bloom_filter(c_customer_sk, DynamicValue(RS_44_store_returns_sr_customer_sk_bloom_filter))) (type: boolean) - Statistics: Num rows: 80000000 Data size: 8320000000 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 652 Data size: 5216 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: c_customer_sk (type: int), c_customer_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 80000000 Data size: 8320000000 Basic stats: COMPLETE Column stats: COMPLETE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 652 Data size: 2608 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 80000000 Data size: 8320000000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Statistics: Num rows: 652 Data size: 2608 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Map 11 Map Operator Tree: @@ -151,14 +136,49 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 652 Data size: 2608 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized - Map 3 + Map 4 + Map Operator Tree: + TableScan + alias: store + filterExpr: ((s_state = 'NM') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 1704 Data size: 153360 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((s_state = 'NM') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 35 Data size: 3150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 35 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 35 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 35 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=35) + minReductionHashAggr: 0.9714286 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + Execution mode: vectorized + Map 6 Map Operator Tree: TableScan alias: store_returns - filterExpr: (((sr_customer_sk is not null and sr_store_sk is not null and sr_returned_date_sk is not null) or (sr_store_sk is not null and sr_returned_date_sk is not null)) and sr_store_sk BETWEEN DynamicValue(RS_41_store_s_store_sk_min) AND DynamicValue(RS_41_store_s_store_sk_max) and in_bloom_filter(sr_store_sk, DynamicValue(RS_41_store_s_store_sk_bloom_filter))) (type: boolean) + filterExpr: (((sr_customer_sk is not null and sr_store_sk is not null and sr_returned_date_sk is not null) or (sr_store_sk is not null and sr_returned_date_sk is not null)) and sr_store_sk BETWEEN DynamicValue(RS_21_store_s_store_sk_min) AND DynamicValue(RS_21_store_s_store_sk_max) and in_bloom_filter(sr_store_sk, DynamicValue(RS_21_store_s_store_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 57591150 Data size: 6891360020 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (sr_customer_sk is not null and sr_store_sk is not null and sr_returned_date_sk is not null and sr_store_sk BETWEEN DynamicValue(RS_41_store_s_store_sk_min) AND DynamicValue(RS_41_store_s_store_sk_max) and in_bloom_filter(sr_store_sk, DynamicValue(RS_41_store_s_store_sk_bloom_filter))) (type: boolean) + predicate: (sr_customer_sk is not null and sr_store_sk is not null and sr_returned_date_sk is not null and sr_store_sk BETWEEN DynamicValue(RS_21_store_s_store_sk_min) AND DynamicValue(RS_21_store_s_store_sk_max) and in_bloom_filter(sr_store_sk, DynamicValue(RS_21_store_s_store_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 51757026 Data size: 6193248408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: sr_returned_date_sk (type: int), sr_customer_sk (type: int), sr_store_sk (type: int), sr_fee (type: decimal(7,2)) @@ -172,7 +192,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3 input vertices: - 1 Map 9 + 1 Map 10 Statistics: Num rows: 16855704 Data size: 1805298496 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col3) @@ -189,7 +209,7 @@ STAGE PLANS: Statistics: Num rows: 16855704 Data size: 2008197920 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Filter Operator - predicate: (sr_store_sk is not null and sr_returned_date_sk is not null and sr_store_sk BETWEEN DynamicValue(RS_41_store_s_store_sk_min) AND DynamicValue(RS_41_store_s_store_sk_max) and in_bloom_filter(sr_store_sk, DynamicValue(RS_41_store_s_store_sk_bloom_filter))) (type: boolean) + predicate: (sr_store_sk is not null and sr_returned_date_sk is not null and sr_store_sk BETWEEN DynamicValue(RS_21_store_s_store_sk_min) AND DynamicValue(RS_21_store_s_store_sk_max) and in_bloom_filter(sr_store_sk, DynamicValue(RS_21_store_s_store_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 53634860 Data size: 6417950124 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: sr_returned_date_sk (type: int), sr_customer_sk (type: int), sr_store_sk (type: int), sr_fee (type: decimal(7,2)) @@ -220,27 +240,63 @@ STAGE PLANS: Statistics: Num rows: 17467258 Data size: 2081058800 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Execution mode: vectorized - Map 9 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col4, _col5 + Statistics: Num rows: 2369298 Data size: 505631124 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col5, _col6 + input vertices: + 1 Reducer 9 + Statistics: Num rows: 2448274 Data size: 793240776 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 652 Data size: 5216 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 652 Data size: 2608 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 652 Data size: 2608 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (_col5 > _col6) (type: boolean) + Statistics: Num rows: 816091 Data size: 264413484 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: _col1 (type: string) + null sort order: z + Statistics: Num rows: 816091 Data size: 264413484 Basic stats: COMPLETE Column stats: COMPLETE + top n: 100 + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 816091 Data size: 81609100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Statistics: Num rows: 816091 Data size: 81609100 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 Execution mode: vectorized - Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 816091 Data size: 81609100 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 10000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 10000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -253,7 +309,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 4 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -277,7 +333,7 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col1, _col2, _col3 input vertices: - 0 Map 1 + 0 Map 4 Statistics: Num rows: 2369298 Data size: 272032680 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) @@ -301,63 +357,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 5 - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5 - Statistics: Num rows: 2369298 Data size: 505631124 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col3, _col5, _col6 - input vertices: - 1 Reducer 8 - Statistics: Num rows: 2448274 Data size: 793240776 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col3 > _col6) (type: boolean) - Statistics: Num rows: 816091 Data size: 264413484 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col5 (type: string) - null sort order: z - Statistics: Num rows: 816091 Data size: 264413484 Basic stats: COMPLETE Column stats: COMPLETE - top n: 100 - Select Operator - expressions: _col5 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 816091 Data size: 81609100 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 816091 Data size: 81609100 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - Reducer 6 - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 816091 Data size: 81609100 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 100 - Statistics: Num rows: 100 Data size: 10000 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 10000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -370,7 +370,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) - Reducer 8 + Reducer 9 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/tez/query20.q.out b/ql/src/test/results/clientpositive/perf/tez/query20.q.out index d6084f9178..9d945d9cac 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query20.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query20.q.out @@ -65,87 +65,87 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Map 6 <- Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_83] - Limit [LIM_82] (rows=100 width=802) + Reducer 5 vectorized + File Output Operator [FS_84] + Limit [LIM_83] (rows=100 width=802) Number of rows:100 - Select Operator [SEL_81] (rows=138600 width=801) + Select Operator [SEL_82] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_80] - Select Operator [SEL_79] (rows=138600 width=801) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_81] + Select Operator [SEL_80] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Top N Key Operator [TNK_78] (rows=138600 width=689) + Top N Key Operator [TNK_79] (rows=138600 width=689) keys:_col0, _col1, _col2, _col3, ((_col5 * 100) / sum_window_0),top n:100 - PTF Operator [PTF_77] (rows=138600 width=689) + PTF Operator [PTF_78] (rows=138600 width=689) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col1"}] - Select Operator [SEL_76] (rows=138600 width=689) + Select Operator [SEL_77] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_75] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_76] PartitionCols:_col1 - Group By Operator [GBY_74] (rows=138600 width=689) + Group By Operator [GBY_75] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_16] (rows=138600 width=689) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col9, _col8, _col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_59] (rows=9551005 width=673) - Conds:RS_12._col1=RS_73._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_73] + Group By Operator [GBY_17] (rows=138600 width=689) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col8)"],keys:_col5, _col4, _col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_60] (rows=9551005 width=673) + Conds:RS_63._col0=RS_14._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_63] PartitionCols:_col0 - Select Operator [SEL_72] (rows=138600 width=581) + Select Operator [SEL_62] (rows=138600 width=581) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_71] (rows=138600 width=581) + Filter Operator [FIL_61] (rows=138600 width=581) predicate:((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=581) + TableScan [TS_0] (rows=462000 width=581) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_58] (rows=31836679 width=110) - Conds:RS_70._col0=RS_62._col0(Inner),Output:["_col1","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_62] + Merge Join Operator [MERGEJOIN_59] (rows=31836679 width=110) + Conds:RS_74._col0=RS_66._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_66] PartitionCols:_col0 - Select Operator [SEL_61] (rows=8116 width=4) + Select Operator [SEL_65] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_60] (rows=8116 width=98) + Filter Operator [FIL_64] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00' AND TIMESTAMP'2001-02-11 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=98) + TableScan [TS_6] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_70] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_74] PartitionCols:_col0 - Select Operator [SEL_69] (rows=286549727 width=119) + Select Operator [SEL_73] (rows=286549727 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_68] (rows=286549727 width=119) + Filter Operator [FIL_72] (rows=286549727 width=119) predicate:(cs_sold_date_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=287989836 width=119) + TableScan [TS_3] (rows=287989836 width=119) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_67] - Group By Operator [GBY_66] (rows=1 width=12) + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_71] + Group By Operator [GBY_70] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_65] - Group By Operator [GBY_64] (rows=1 width=12) + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_69] + Group By Operator [GBY_68] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_63] (rows=8116 width=4) + Select Operator [SEL_67] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_61] + Please refer to the previous Select Operator [SEL_65] diff --git a/ql/src/test/results/clientpositive/perf/tez/query21.q.out b/ql/src/test/results/clientpositive/perf/tez/query21.q.out index 08e4828dc2..edc37c482d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query21.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query21.q.out @@ -69,83 +69,83 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 7 <- Map 9 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_97] - Limit [LIM_96] (rows=100 width=216) + Reducer 4 vectorized + File Output Operator [FS_98] + Limit [LIM_97] (rows=100 width=216) Number of rows:100 - Select Operator [SEL_95] (rows=115991 width=216) + Select Operator [SEL_96] (rows=115991 width=216) Output:["_col0","_col1","_col2","_col3"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_94] - Top N Key Operator [TNK_93] (rows=115991 width=216) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_95] + Top N Key Operator [TNK_94] (rows=115991 width=216) keys:_col0, _col1,top n:100 - Filter Operator [FIL_92] (rows=115991 width=216) + Filter Operator [FIL_93] (rows=115991 width=216) predicate:(CASE WHEN ((_col2 > 0L)) THEN ((0.666667D <= (UDFToDouble(_col3) / UDFToDouble(_col2)))) ELSE (false) END and CASE WHEN ((_col2 > 0L)) THEN (((UDFToDouble(_col3) / UDFToDouble(_col2)) <= 1.5D)) ELSE (false) END) - Group By Operator [GBY_91] (rows=463966 width=216) + Group By Operator [GBY_92] (rows=463966 width=216) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_23] (rows=463966 width=216) + Group By Operator [GBY_24] (rows=463966 width=216) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1 - Select Operator [SEL_21] (rows=463966 width=208) + Select Operator [SEL_22] (rows=463966 width=208) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_78] (rows=463966 width=208) - Conds:RS_18._col2=RS_90._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col10"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_90] + Merge Join Operator [MERGEJOIN_79] (rows=463966 width=208) + Conds:RS_82._col0=RS_20._col2(Inner),Output:["_col1","_col5","_col7","_col8","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_82] PartitionCols:_col0 - Select Operator [SEL_89] (rows=27 width=104) + Select Operator [SEL_81] (rows=27 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_88] (rows=27 width=104) + Filter Operator [FIL_80] (rows=27 width=104) predicate:w_warehouse_sk is not null - TableScan [TS_9] (rows=27 width=104) + TableScan [TS_0] (rows=27 width=104) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_20] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_77] (rows=463966 width=112) - Conds:RS_15._col1=RS_87._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_87] + Merge Join Operator [MERGEJOIN_78] (rows=463966 width=112) + Conds:RS_15._col1=RS_91._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_91] PartitionCols:_col0 - Select Operator [SEL_86] (rows=51333 width=104) + Select Operator [SEL_90] (rows=51333 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_85] (rows=51333 width=215) + Filter Operator [FIL_89] (rows=51333 width=215) predicate:(i_current_price BETWEEN 0.99 AND 1.49 and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=215) + TableScan [TS_9] (rows=462000 width=215) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_current_price"] - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_76] (rows=4175715 width=18) - Conds:RS_81._col0=RS_84._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_81] + Merge Join Operator [MERGEJOIN_77] (rows=4175715 width=18) + Conds:RS_85._col0=RS_88._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_85] PartitionCols:_col0 - Select Operator [SEL_80] (rows=37584000 width=15) + Select Operator [SEL_84] (rows=37584000 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_79] (rows=37584000 width=15) + Filter Operator [FIL_83] (rows=37584000 width=15) predicate:(inv_warehouse_sk is not null and inv_item_sk is not null and inv_date_sk is not null) - TableScan [TS_0] (rows=37584000 width=15) + TableScan [TS_3] (rows=37584000 width=15) default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_84] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_88] PartitionCols:_col0 - Select Operator [SEL_83] (rows=8116 width=12) + Select Operator [SEL_87] (rows=8116 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_82] (rows=8116 width=98) + Filter Operator [FIL_86] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=98) + TableScan [TS_6] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query22.q.out b/ql/src/test/results/clientpositive/perf/tez/query22.q.out index 45a090267b..69ca472ea5 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query22.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query22.q.out @@ -53,81 +53,81 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 7 <- Map 9 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_95] - Limit [LIM_94] (rows=100 width=397) + Reducer 4 vectorized + File Output Operator [FS_96] + Limit [LIM_95] (rows=100 width=397) Number of rows:100 - Select Operator [SEL_93] (rows=32730675 width=397) + Select Operator [SEL_94] (rows=32730675 width=397) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_92] - Select Operator [SEL_91] (rows=32730675 width=397) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_93] + Select Operator [SEL_92] (rows=32730675 width=397) Output:["_col0","_col1","_col2","_col3","_col4"] - Top N Key Operator [TNK_90] (rows=32730675 width=413) + Top N Key Operator [TNK_91] (rows=32730675 width=413) keys:(UDFToDouble(_col5) / _col6), _col3, _col0, _col1, _col2,top n:100 - Group By Operator [GBY_89] (rows=32730675 width=413) + Group By Operator [GBY_90] (rows=32730675 width=413) Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_23] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_22] (rows=32730675 width=413) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)","count(_col3)"],keys:_col7, _col8, _col9, _col10, 0L - Merge Join Operator [MERGEJOIN_76] (rows=6546135 width=391) - Conds:RS_18._col1=RS_88._col0(Inner),Output:["_col3","_col7","_col8","_col9","_col10"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_88] + Group By Operator [GBY_23] (rows=32730675 width=413) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col8)","count(_col8)"],keys:_col1, _col2, _col3, _col4, 0L + Merge Join Operator [MERGEJOIN_77] (rows=6546135 width=391) + Conds:RS_80._col0=RS_20._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_80] PartitionCols:_col0 - Select Operator [SEL_87] (rows=462000 width=393) + Select Operator [SEL_79] (rows=462000 width=393) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_86] (rows=462000 width=393) + Filter Operator [FIL_78] (rows=462000 width=393) predicate:i_item_sk is not null - TableScan [TS_9] (rows=462000 width=393) + TableScan [TS_0] (rows=462000 width=393) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_20] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_75] (rows=6546135 width=6) - Conds:RS_15._col2=RS_85._col0(Inner),Output:["_col1","_col3"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_85] + Merge Join Operator [MERGEJOIN_76] (rows=6546135 width=6) + Conds:RS_15._col2=RS_89._col0(Inner),Output:["_col1","_col3"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_89] PartitionCols:_col0 - Select Operator [SEL_84] (rows=27 width=4) + Select Operator [SEL_88] (rows=27 width=4) Output:["_col0"] - Filter Operator [FIL_83] (rows=27 width=4) + Filter Operator [FIL_87] (rows=27 width=4) predicate:w_warehouse_sk is not null - TableScan [TS_6] (rows=27 width=4) + TableScan [TS_9] (rows=27 width=4) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk"] - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_74] (rows=6546135 width=10) - Conds:RS_79._col0=RS_82._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_79] + Merge Join Operator [MERGEJOIN_75] (rows=6546135 width=10) + Conds:RS_83._col0=RS_86._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_83] PartitionCols:_col0 - Select Operator [SEL_78] (rows=37584000 width=15) + Select Operator [SEL_82] (rows=37584000 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_77] (rows=37584000 width=15) + Filter Operator [FIL_81] (rows=37584000 width=15) predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) - TableScan [TS_0] (rows=37584000 width=15) + TableScan [TS_3] (rows=37584000 width=15) default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_82] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_86] PartitionCols:_col0 - Select Operator [SEL_81] (rows=317 width=4) + Select Operator [SEL_85] (rows=317 width=4) Output:["_col0"] - Filter Operator [FIL_80] (rows=317 width=8) + Filter Operator [FIL_84] (rows=317 width=8) predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=8) + TableScan [TS_6] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query23.q.out b/ql/src/test/results/clientpositive/perf/tez/query23.q.out index 3ac6bf1d9f..a30961b5ab 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query23.q.out @@ -1,5 +1,5 @@ -Warning: Shuffle Join MERGEJOIN[454][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 25' is a cross product -Warning: Shuffle Join MERGEJOIN[456][tables = [$hdt$_3, $hdt$_4]] in Stage 'Reducer 30' is a cross product +Warning: Shuffle Join MERGEJOIN[458][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 25' is a cross product +Warning: Shuffle Join MERGEJOIN[460][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 30' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -119,367 +119,367 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Map 15 <- Reducer 20 (BROADCAST_EDGE) -Map 22 <- Reducer 7 (BROADCAST_EDGE) +Map 1 <- Reducer 14 (BROADCAST_EDGE) +Map 16 <- Reducer 19 (BROADCAST_EDGE) +Map 22 <- Reducer 9 (BROADCAST_EDGE) Map 33 <- Reducer 36 (BROADCAST_EDGE) -Map 37 <- Reducer 14 (BROADCAST_EDGE) -Map 38 <- Reducer 13 (BROADCAST_EDGE) -Reducer 10 <- Map 37 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 13 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) -Reducer 17 <- Map 21 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) +Map 37 <- Reducer 21 (BROADCAST_EDGE) +Map 38 <- Reducer 12 (BROADCAST_EDGE) +Reducer 10 <- Reducer 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 12 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 20 <- Map 18 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) +Reducer 21 <- Map 18 (CUSTOM_SIMPLE_EDGE) Reducer 23 <- Map 22 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) Reducer 24 <- Reducer 23 (SIMPLE_EDGE) Reducer 25 <- Reducer 24 (CUSTOM_SIMPLE_EDGE), Reducer 29 (CUSTOM_SIMPLE_EDGE) Reducer 27 <- Map 26 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) Reducer 28 <- Reducer 27 (SIMPLE_EDGE) Reducer 29 <- Reducer 28 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 30 <- Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 32 (CUSTOM_SIMPLE_EDGE) Reducer 31 <- Map 26 (SIMPLE_EDGE), Map 38 (SIMPLE_EDGE) Reducer 32 <- Reducer 31 (SIMPLE_EDGE) Reducer 34 <- Map 33 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) Reducer 36 <- Map 35 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 25 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 25 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 8 <- Union 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_538] - Group By Operator [GBY_537] (rows=1 width=112) + Reducer 8 vectorized + File Output Operator [FS_542] + Group By Operator [GBY_541] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] - <-Union 5 [CUSTOM_SIMPLE_EDGE] - <-Reducer 12 [CONTAINS] - Reduce Output Operator [RS_467] - Group By Operator [GBY_466] (rows=1 width=112) + <-Union 7 [CUSTOM_SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] + Reduce Output Operator [RS_471] + Group By Operator [GBY_470] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_464] (rows=3941102 width=112) + Select Operator [SEL_468] (rows=3941102 width=112) Output:["_col0"] - Merge Join Operator [MERGEJOIN_463] (rows=3941102 width=114) - Conds:RS_178._col2=RS_179._col0(Left Semi),Output:["_col3","_col4"] - <-Reducer 11 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_178] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_453] (rows=3941102 width=118) - Conds:RS_173._col1=RS_503._col0(Inner),Output:["_col2","_col3","_col4"] - <-Reducer 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_503] + Merge Join Operator [MERGEJOIN_467] (rows=3941102 width=114) + Conds:RS_182._col3=RS_183._col0(Left Semi),Output:["_col4","_col5"] + <-Reducer 10 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_182] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_457] (rows=3941102 width=118) + Conds:RS_492._col0=RS_178._col1(Inner),Output:["_col3","_col4","_col5"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_492] PartitionCols:_col0 - Group By Operator [GBY_501] (rows=2235 width=4) + Group By Operator [GBY_490] (rows=2235 width=4) Output:["_col0"],keys:_col1 - Select Operator [SEL_500] (rows=6548799 width=220) + Select Operator [SEL_489] (rows=6548799 width=220) Output:["_col1"] - Filter Operator [FIL_499] (rows=6548799 width=220) + Filter Operator [FIL_488] (rows=6548799 width=220) predicate:(_col3 > 4L) - Select Operator [SEL_498] (rows=19646398 width=220) + Select Operator [SEL_487] (rows=19646398 width=220) Output:["_col1","_col3"] - Group By Operator [GBY_497] (rows=19646398 width=220) + Group By Operator [GBY_486] (rows=19646398 width=220) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_23] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col0 - Group By Operator [GBY_22] (rows=19646398 width=220) + Group By Operator [GBY_16] (rows=19646398 width=220) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col3, _col5 - Merge Join Operator [MERGEJOIN_442] (rows=19646398 width=212) - Conds:RS_18._col1=RS_496._col0(Inner),Output:["_col3","_col4","_col5"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_496] + Merge Join Operator [MERGEJOIN_445] (rows=19646398 width=212) + Conds:RS_12._col1=RS_485._col0(Inner),Output:["_col3","_col4","_col5"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_485] PartitionCols:_col0 - Select Operator [SEL_495] (rows=462000 width=118) + Select Operator [SEL_484] (rows=462000 width=118) Output:["_col0","_col1"] - Filter Operator [FIL_494] (rows=462000 width=188) + Filter Operator [FIL_483] (rows=462000 width=188) predicate:i_item_sk is not null - TableScan [TS_12] (rows=462000 width=188) + TableScan [TS_6] (rows=462000 width=188) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_desc"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_18] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_441] (rows=19646398 width=98) - Conds:RS_493._col0=RS_485._col0(Inner),Output:["_col1","_col3"] - <-Map 19 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_485] + Merge Join Operator [MERGEJOIN_444] (rows=19646398 width=98) + Conds:RS_482._col0=RS_474._col0(Inner),Output:["_col1","_col3"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_474] PartitionCols:_col0 - Select Operator [SEL_484] (rows=2609 width=98) + Select Operator [SEL_473] (rows=2609 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_483] (rows=2609 width=102) + Filter Operator [FIL_472] (rows=2609 width=102) predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=102) + TableScan [TS_3] (rows=73049 width=102) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_year"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_493] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_482] PartitionCols:_col0 - Select Operator [SEL_492] (rows=550076554 width=7) + Select Operator [SEL_481] (rows=550076554 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_491] (rows=550076554 width=7) - predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_6] (rows=575995635 width=7) + Filter Operator [FIL_480] (rows=550076554 width=7) + predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=7) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk"] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_490] - Group By Operator [GBY_489] (rows=1 width=12) + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_479] + Group By Operator [GBY_478] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_488] - Group By Operator [GBY_487] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_477] + Group By Operator [GBY_476] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_486] (rows=2609 width=4) + Select Operator [SEL_475] (rows=2609 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_484] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_173] + Please refer to the previous Select Operator [SEL_473] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_178] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_446] (rows=3941102 width=122) - Conds:RS_543._col0=RS_472._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_472] + Merge Join Operator [MERGEJOIN_452] (rows=3941102 width=122) + Conds:RS_547._col0=RS_497._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_497] PartitionCols:_col0 - Select Operator [SEL_469] (rows=50 width=4) + Select Operator [SEL_494] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_468] (rows=50 width=12) + Filter Operator [FIL_493] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 1) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=12) + TableScan [TS_29] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_543] + SHUFFLE [RS_547] PartitionCols:_col0 - Select Operator [SEL_542] (rows=143930993 width=127) + Select Operator [SEL_546] (rows=143930993 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_541] (rows=143930993 width=127) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_171_date_dim_d_date_sk_min) AND DynamicValue(RS_171_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_171_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_91] (rows=144002668 width=127) + Filter Operator [FIL_545] (rows=143930993 width=127) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_126_date_dim_d_date_sk_min) AND DynamicValue(RS_126_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_126_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_119] (rows=144002668 width=127) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_quantity","ws_list_price"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_540] - Group By Operator [GBY_539] (rows=1 width=12) + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_544] + Group By Operator [GBY_543] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_477] - Group By Operator [GBY_475] (rows=1 width=12) + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_502] + Group By Operator [GBY_500] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_473] (rows=50 width=4) + Select Operator [SEL_498] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_469] + Please refer to the previous Select Operator [SEL_494] <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_179] + SHUFFLE [RS_183] PartitionCols:_col0 - Group By Operator [GBY_177] (rows=235937 width=4) + Group By Operator [GBY_181] (rows=235937 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_169] (rows=471875 width=4) + Select Operator [SEL_176] (rows=471875 width=4) Output:["_col0"] - Filter Operator [FIL_168] (rows=471875 width=228) + Filter Operator [FIL_175] (rows=471875 width=228) predicate:(_col1 > _col2) - Merge Join Operator [MERGEJOIN_456] (rows=1415625 width=228) + Merge Join Operator [MERGEJOIN_460] (rows=1415625 width=228) Conds:(Inner),Output:["_col0","_col1","_col2"] <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_536] - Select Operator [SEL_534] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_540] + Select Operator [SEL_538] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_533] (rows=1 width=112) + Filter Operator [FIL_537] (rows=1 width=112) predicate:_col0 is not null - Group By Operator [GBY_532] (rows=1 width=112) + Group By Operator [GBY_536] (rows=1 width=112) Output:["_col0"],aggregations:["max(VALUE._col0)"] <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_531] - Group By Operator [GBY_530] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_535] + Group By Operator [GBY_534] (rows=1 width=112) Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_529] (rows=11859 width=116) + Select Operator [SEL_533] (rows=11859 width=116) Output:["_col1"] - Group By Operator [GBY_528] (rows=11859 width=116) + Group By Operator [GBY_532] (rows=11859 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 27 [SIMPLE_EDGE] - SHUFFLE [RS_64] + SHUFFLE [RS_69] PartitionCols:_col0 - Group By Operator [GBY_63] (rows=106731 width=116) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_445] (rows=18762463 width=116) - Conds:RS_59._col1=RS_512._col0(Inner),Output:["_col2","_col4"] + Group By Operator [GBY_68] (rows=106731 width=116) + Output:["_col0","_col1"],aggregations:["sum(_col3)"],keys:_col0 + Merge Join Operator [MERGEJOIN_449] (rows=18762463 width=116) + Conds:RS_516._col0=RS_65._col1(Inner),Output:["_col0","_col3"] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_512] + SHUFFLE [RS_516] PartitionCols:_col0 - Select Operator [SEL_510] (rows=80000000 width=4) + Select Operator [SEL_514] (rows=80000000 width=4) Output:["_col0"] - Filter Operator [FIL_509] (rows=80000000 width=4) + Filter Operator [FIL_513] (rows=80000000 width=4) predicate:c_customer_sk is not null - TableScan [TS_35] (rows=80000000 width=4) + TableScan [TS_39] (rows=80000000 width=4) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk"] <-Reducer 34 [SIMPLE_EDGE] - SHUFFLE [RS_59] + SHUFFLE [RS_65] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_444] (rows=18762463 width=112) - Conds:RS_527._col0=RS_519._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_448] (rows=18762463 width=112) + Conds:RS_531._col0=RS_523._col0(Inner),Output:["_col1","_col2"] <-Map 35 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_519] + PARTITION_ONLY_SHUFFLE [RS_523] PartitionCols:_col0 - Select Operator [SEL_518] (rows=2609 width=4) + Select Operator [SEL_522] (rows=2609 width=4) Output:["_col0"] - Filter Operator [FIL_517] (rows=2609 width=8) + Filter Operator [FIL_521] (rows=2609 width=8) predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) - TableScan [TS_50] (rows=73049 width=8) + TableScan [TS_57] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_527] + SHUFFLE [RS_531] PartitionCols:_col0 - Select Operator [SEL_526] (rows=525327388 width=119) + Select Operator [SEL_530] (rows=525327388 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_525] (rows=525327388 width=118) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_47] (rows=575995635 width=118) + Filter Operator [FIL_529] (rows=525327388 width=118) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_61_date_dim_d_date_sk_min) AND DynamicValue(RS_61_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_61_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_54] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] <-Reducer 36 [BROADCAST_EDGE] vectorized - BROADCAST [RS_524] - Group By Operator [GBY_523] (rows=1 width=12) + BROADCAST [RS_528] + Group By Operator [GBY_527] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 35 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_522] - Group By Operator [GBY_521] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_526] + Group By Operator [GBY_525] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_520] (rows=2609 width=4) + Select Operator [SEL_524] (rows=2609 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_518] + Please refer to the previous Select Operator [SEL_522] <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_551] - Filter Operator [FIL_550] (rows=1415625 width=116) + PARTITION_ONLY_SHUFFLE [RS_555] + Filter Operator [FIL_554] (rows=1415625 width=116) predicate:_col1 is not null - Group By Operator [GBY_549] (rows=1415625 width=116) + Group By Operator [GBY_553] (rows=1415625 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 31 [SIMPLE_EDGE] - SHUFFLE [RS_134] + SHUFFLE [RS_140] PartitionCols:_col0 - Group By Operator [GBY_133] (rows=80000000 width=116) + Group By Operator [GBY_139] (rows=80000000 width=116) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col2 - Merge Join Operator [MERGEJOIN_449] (rows=550080312 width=116) - Conds:RS_548._col0=RS_513._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_453] (rows=550080312 width=116) + Conds:RS_552._col0=RS_517._col0(Inner),Output:["_col1","_col2"] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_513] + SHUFFLE [RS_517] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_510] + Please refer to the previous Select Operator [SEL_514] <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_548] + SHUFFLE [RS_552] PartitionCols:_col0 - Select Operator [SEL_547] (rows=550080312 width=115) + Select Operator [SEL_551] (rows=550080312 width=115) Output:["_col0","_col1"] - Filter Operator [FIL_546] (rows=550080312 width=114) - predicate:(ss_customer_sk is not null and ss_customer_sk BETWEEN DynamicValue(RS_178_web_sales_ws_bill_customer_sk_min) AND DynamicValue(RS_178_web_sales_ws_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_178_web_sales_ws_bill_customer_sk_bloom_filter))) - TableScan [TS_123] (rows=575995635 width=114) + Filter Operator [FIL_550] (rows=550080312 width=114) + predicate:(ss_customer_sk is not null and ss_customer_sk BETWEEN DynamicValue(RS_182_web_sales_ws_bill_customer_sk_min) AND DynamicValue(RS_182_web_sales_ws_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_182_web_sales_ws_bill_customer_sk_bloom_filter))) + TableScan [TS_129] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_545] - Group By Operator [GBY_544] (rows=1 width=12) + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_549] + Group By Operator [GBY_548] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_415] - Group By Operator [GBY_414] (rows=1 width=12) + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_419] + Group By Operator [GBY_418] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_413] (rows=3941102 width=7) + Select Operator [SEL_417] (rows=3941102 width=7) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_453] - <-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_462] - Group By Operator [GBY_461] (rows=1 width=112) + Please refer to the previous Merge Join Operator [MERGEJOIN_457] + <-Reducer 6 [CONTAINS] + Reduce Output Operator [RS_466] + Group By Operator [GBY_465] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_459] (rows=7751875 width=112) + Select Operator [SEL_463] (rows=7751875 width=112) Output:["_col0"] - Merge Join Operator [MERGEJOIN_458] (rows=7751875 width=94) - Conds:RS_87._col1=RS_88._col0(Left Semi),Output:["_col3","_col4"] - <-Reducer 3 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_87] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_452] (rows=7751875 width=97) - Conds:RS_82._col2=RS_502._col0(Inner),Output:["_col1","_col3","_col4"] - <-Reducer 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_502] + Merge Join Operator [MERGEJOIN_462] (rows=7751875 width=94) + Conds:RS_89._col2=RS_90._col0(Left Semi),Output:["_col4","_col5"] + <-Reducer 5 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_89] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_456] (rows=7751875 width=97) + Conds:RS_491._col0=RS_85._col2(Inner),Output:["_col2","_col4","_col5"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_491] PartitionCols:_col0 - Please refer to the previous Group By Operator [GBY_501] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_82] + Please refer to the previous Group By Operator [GBY_490] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_85] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_440] (rows=7751875 width=101) - Conds:RS_482._col0=RS_470._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_470] + Merge Join Operator [MERGEJOIN_446] (rows=7751875 width=101) + Conds:RS_507._col0=RS_495._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_495] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_469] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_482] + Please refer to the previous Select Operator [SEL_494] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_507] PartitionCols:_col0 - Select Operator [SEL_481] (rows=285117831 width=127) + Select Operator [SEL_506] (rows=285117831 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_480] (rows=285117831 width=127) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_80_date_dim_d_date_sk_min) AND DynamicValue(RS_80_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_80_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=287989836 width=127) + Filter Operator [FIL_505] (rows=285117831 width=127) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_33_date_dim_d_date_sk_min) AND DynamicValue(RS_33_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_33_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_26] (rows=287989836 width=127) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity","cs_list_price"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_479] - Group By Operator [GBY_478] (rows=1 width=12) + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_504] + Group By Operator [GBY_503] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_476] - Group By Operator [GBY_474] (rows=1 width=12) + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_501] + Group By Operator [GBY_499] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_471] (rows=50 width=4) + Select Operator [SEL_496] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_469] + Please refer to the previous Select Operator [SEL_494] <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_88] + SHUFFLE [RS_90] PartitionCols:_col0 - Group By Operator [GBY_86] (rows=235937 width=4) + Group By Operator [GBY_88] (rows=235937 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_78] (rows=471875 width=4) + Select Operator [SEL_83] (rows=471875 width=4) Output:["_col0"] - Filter Operator [FIL_77] (rows=471875 width=228) + Filter Operator [FIL_82] (rows=471875 width=228) predicate:(_col1 > _col2) - Merge Join Operator [MERGEJOIN_454] (rows=1415625 width=228) + Merge Join Operator [MERGEJOIN_458] (rows=1415625 width=228) Conds:(Inner),Output:["_col0","_col1","_col2"] <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_535] - Please refer to the previous Select Operator [SEL_534] + PARTITION_ONLY_SHUFFLE [RS_539] + Please refer to the previous Select Operator [SEL_538] <-Reducer 24 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_516] - Filter Operator [FIL_515] (rows=1415625 width=116) + PARTITION_ONLY_SHUFFLE [RS_520] + Filter Operator [FIL_519] (rows=1415625 width=116) predicate:_col1 is not null - Group By Operator [GBY_514] (rows=1415625 width=116) + Group By Operator [GBY_518] (rows=1415625 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_43] + SHUFFLE [RS_47] PartitionCols:_col0 - Group By Operator [GBY_42] (rows=80000000 width=116) + Group By Operator [GBY_46] (rows=80000000 width=116) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col2 - Merge Join Operator [MERGEJOIN_443] (rows=550080312 width=116) - Conds:RS_508._col0=RS_511._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_447] (rows=550080312 width=116) + Conds:RS_512._col0=RS_515._col0(Inner),Output:["_col1","_col2"] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_511] + SHUFFLE [RS_515] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_510] + Please refer to the previous Select Operator [SEL_514] <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_508] + SHUFFLE [RS_512] PartitionCols:_col0 - Select Operator [SEL_507] (rows=550080312 width=115) + Select Operator [SEL_511] (rows=550080312 width=115) Output:["_col0","_col1"] - Filter Operator [FIL_506] (rows=550080312 width=114) - predicate:(ss_customer_sk is not null and ss_customer_sk BETWEEN DynamicValue(RS_87_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_87_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_87_catalog_sales_cs_bill_customer_sk_bloom_filter))) - TableScan [TS_32] (rows=575995635 width=114) + Filter Operator [FIL_510] (rows=550080312 width=114) + predicate:(ss_customer_sk is not null and ss_customer_sk BETWEEN DynamicValue(RS_89_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_89_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_89_catalog_sales_cs_bill_customer_sk_bloom_filter))) + TableScan [TS_36] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_505] - Group By Operator [GBY_504] (rows=1 width=12) + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_509] + Group By Operator [GBY_508] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_329] - Group By Operator [GBY_328] (rows=1 width=12) + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_333] + Group By Operator [GBY_332] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_327] (rows=7751875 width=6) + Select Operator [SEL_331] (rows=7751875 width=6) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_452] + Please refer to the previous Merge Join Operator [MERGEJOIN_456] diff --git a/ql/src/test/results/clientpositive/perf/tez/query24.q.out b/ql/src/test/results/clientpositive/perf/tez/query24.q.out index 202f68b778..a9607fd96d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query24.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[305][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[307][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain with ssales as (select c_last_name @@ -116,210 +116,210 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) Map 21 <- Reducer 17 (BROADCAST_EDGE) -Reducer 10 <- Map 20 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) -Reducer 15 <- Map 19 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Map 8 <- Reducer 12 (BROADCAST_EDGE) +Reducer 10 <- Reducer 15 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 11 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) Reducer 16 <- Map 21 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) Reducer 17 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 20 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 7 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 1 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 - File Output Operator [FS_98] - Select Operator [SEL_97] (rows=1313165 width=380) + Reducer 4 + File Output Operator [FS_102] + Select Operator [SEL_101] (rows=1313165 width=380) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_96] (rows=1313165 width=492) + Filter Operator [FIL_100] (rows=1313165 width=492) predicate:(_col3 > _col4) - Merge Join Operator [MERGEJOIN_305] (rows=3939496 width=492) + Merge Join Operator [MERGEJOIN_307] (rows=3939496 width=492) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_351] - Select Operator [SEL_350] (rows=1 width=112) + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_340] + Filter Operator [FIL_339] (rows=3939496 width=380) + predicate:_col3 is not null + Select Operator [SEL_338] (rows=3939496 width=380) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_337] (rows=3939496 width=380) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col9)"],keys:_col4, _col5, _col7 + Select Operator [SEL_336] (rows=84010488 width=843) + Output:["_col4","_col5","_col7","_col9"] + Group By Operator [GBY_335] (rows=84010488 width=843) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_38] (rows=84010488 width=843) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col6)"],keys:_col14, _col15, _col22, _col8, _col9, _col10, _col11, _col18, _col23 + Merge Join Operator [MERGEJOIN_301] (rows=138508741 width=824) + Conds:RS_310._col0, _col1=RS_35._col0, _col3(Inner),Output:["_col6","_col8","_col9","_col10","_col11","_col14","_col15","_col18","_col22","_col23"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_310] + PartitionCols:_col0, _col1 + Select Operator [SEL_309] (rows=57591150 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_308] (rows=57591150 width=8) + predicate:(sr_ticket_number is not null and sr_item_sk is not null) + TableScan [TS_0] (rows=57591150 width=8) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0, _col3 + Merge Join Operator [MERGEJOIN_300] (rows=84010488 width=820) + Conds:RS_30._col1, _col2=RS_31._col0, _col9(Inner),Output:["_col0","_col3","_col4","_col6","_col7","_col8","_col9","_col12","_col13","_col16","_col20","_col21"] + <-Reducer 15 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_31] + PartitionCols:_col0, _col9 + Select Operator [SEL_26] (rows=7276996 width=724) + Output:["_col0","_col2","_col3","_col6","_col9","_col10","_col11"] + Filter Operator [FIL_25] (rows=7276996 width=724) + predicate:(_col4 <> _col8) + Merge Join Operator [MERGEJOIN_299] (rows=7276996 width=724) + Conds:RS_328._col1=RS_23._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col6","_col8","_col9","_col10","_col11"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_328] + PartitionCols:_col1 + Select Operator [SEL_327] (rows=80000000 width=280) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_326] (rows=80000000 width=280) + predicate:(c_customer_sk is not null and c_current_addr_sk is not null) + TableScan [TS_9] (rows=80000000 width=280) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name","c_birth_country"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_298] (rows=611379 width=452) + Conds:RS_331._col2=RS_334._col3(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_331] + PartitionCols:_col2 + Select Operator [SEL_330] (rows=40000000 width=363) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_329] (rows=40000000 width=276) + predicate:(ca_address_sk is not null and ca_zip is not null) + TableScan [TS_12] (rows=40000000 width=276) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_zip","ca_country"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_334] + PartitionCols:_col3 + Select Operator [SEL_333] (rows=155 width=267) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_332] (rows=155 width=271) + predicate:((s_market_id = 7) and s_store_sk is not null and s_zip is not null) + TableScan [TS_15] (rows=1704 width=270) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col1, _col2 + Merge Join Operator [MERGEJOIN_297] (rows=76612563 width=382) + Conds:RS_325._col0=RS_316._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_316] + PartitionCols:_col0 + Select Operator [SEL_314] (rows=7000 width=295) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_312] (rows=7000 width=384) + predicate:((i_color = 'orchid') and i_item_sk is not null) + TableScan [TS_6] (rows=462000 width=384) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_325] + PartitionCols:_col0 + Select Operator [SEL_324] (rows=525333486 width=122) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_323] (rows=525333486 width=122) + predicate:(ss_customer_sk is not null and ss_store_sk is not null and ss_ticket_number is not null and ss_item_sk is not null and ss_item_sk BETWEEN DynamicValue(RS_28_item_i_item_sk_min) AND DynamicValue(RS_28_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_28_item_i_item_sk_bloom_filter))) + TableScan [TS_3] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_322] + Group By Operator [GBY_321] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] + Group By Operator [GBY_319] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_317] (rows=7000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_314] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_353] + Select Operator [SEL_352] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_349] (rows=1 width=120) + Filter Operator [FIL_351] (rows=1 width=120) predicate:CAST( (_col0 / _col1) AS decimal(21,6)) is not null - Group By Operator [GBY_348] (rows=1 width=120) + Group By Operator [GBY_350] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_347] - Group By Operator [GBY_346] (rows=1 width=120) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_349] + Group By Operator [GBY_348] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col10)","count(_col10)"] - Select Operator [SEL_345] (rows=576061174 width=932) + Select Operator [SEL_347] (rows=576061174 width=932) Output:["_col10"] - Group By Operator [GBY_344] (rows=576061174 width=932) + Group By Operator [GBY_346] (rows=576061174 width=932) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_83] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_87] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Group By Operator [GBY_82] (rows=576061174 width=932) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col17)"],keys:_col10, _col11, _col1, _col5, _col6, _col19, _col20, _col21, _col22, _col23 - Merge Join Operator [MERGEJOIN_304] (rows=589731269 width=928) - Conds:RS_78._col13, _col16=RS_332._col0, _col1(Inner),Output:["_col1","_col5","_col6","_col10","_col11","_col17","_col19","_col20","_col21","_col22","_col23"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_332] + Group By Operator [GBY_86] (rows=576061174 width=932) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col4)"],keys:_col7, _col8, _col11, _col15, _col16, _col19, _col20, _col21, _col22, _col23 + Merge Join Operator [MERGEJOIN_306] (rows=589731269 width=928) + Conds:RS_82._col0, _col3=RS_311._col0, _col1(Inner),Output:["_col4","_col7","_col8","_col11","_col15","_col16","_col19","_col20","_col21","_col22","_col23"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_311] PartitionCols:_col0, _col1 - Select Operator [SEL_330] (rows=57591150 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_329] (rows=57591150 width=8) - predicate:(sr_ticket_number is not null and sr_item_sk is not null) - TableScan [TS_23] (rows=57591150 width=8) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_78] - PartitionCols:_col13, _col16 - Merge Join Operator [MERGEJOIN_303] (rows=576061174 width=936) - Conds:RS_75._col13=RS_312._col0(Inner),Output:["_col1","_col5","_col6","_col10","_col11","_col13","_col16","_col17","_col19","_col20","_col21","_col22","_col23"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] + Please refer to the previous Select Operator [SEL_309] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_82] + PartitionCols:_col0, _col3 + Merge Join Operator [MERGEJOIN_305] (rows=576061174 width=936) + Conds:RS_79._col0=RS_318._col0(Inner),Output:["_col0","_col3","_col4","_col7","_col8","_col11","_col15","_col16","_col19","_col20","_col21","_col22","_col23"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_318] PartitionCols:_col0 - Select Operator [SEL_309] (rows=462000 width=384) + Select Operator [SEL_315] (rows=462000 width=384) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_307] (rows=462000 width=384) + Filter Operator [FIL_313] (rows=462000 width=384) predicate:i_item_sk is not null - TableScan [TS_3] (rows=462000 width=384) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] + Please refer to the previous TableScan [TS_6] <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_75] - PartitionCols:_col13 - Merge Join Operator [MERGEJOIN_302] (rows=576061174 width=555) - Conds:RS_72._col8, _col4=RS_343._col1, _col2(Inner),Output:["_col1","_col5","_col6","_col10","_col11","_col13","_col16","_col17"] + SHUFFLE [RS_79] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_304] (rows=576061174 width=555) + Conds:RS_345._col1, _col2=RS_77._col0, _col9(Inner),Output:["_col0","_col3","_col4","_col7","_col8","_col11","_col15","_col16"] <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_72] - PartitionCols:_col8, _col4 - Filter Operator [FIL_21] (rows=7276996 width=724) - predicate:(_col12 <> _col3) - Merge Join Operator [MERGEJOIN_297] (rows=7276996 width=724) - Conds:RS_18._col0=RS_328._col1(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col8","_col10","_col11","_col12"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_328] - PartitionCols:_col1 - Select Operator [SEL_327] (rows=80000000 width=280) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_326] (rows=80000000 width=280) - predicate:(c_customer_sk is not null and c_current_addr_sk is not null) - TableScan [TS_12] (rows=80000000 width=280) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name","c_birth_country"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_296] (rows=611379 width=452) - Conds:RS_322._col2=RS_325._col3(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_322] - PartitionCols:_col2 - Select Operator [SEL_321] (rows=40000000 width=363) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_320] (rows=40000000 width=276) - predicate:(ca_address_sk is not null and ca_zip is not null) - TableScan [TS_6] (rows=40000000 width=276) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_zip","ca_country"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_325] - PartitionCols:_col3 - Select Operator [SEL_324] (rows=155 width=267) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_323] (rows=155 width=271) - predicate:((s_market_id = 7) and s_store_sk is not null and s_zip is not null) - TableScan [TS_9] (rows=1704 width=270) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] + PARTITION_ONLY_SHUFFLE [RS_77] + PartitionCols:_col0, _col9 + Please refer to the previous Select Operator [SEL_26] <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_343] + SHUFFLE [RS_345] PartitionCols:_col1, _col2 - Select Operator [SEL_342] (rows=525333486 width=122) + Select Operator [SEL_344] (rows=525333486 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_341] (rows=525333486 width=122) - predicate:(ss_customer_sk is not null and ss_store_sk is not null and ss_ticket_number is not null and ss_item_sk is not null and ss_customer_sk BETWEEN DynamicValue(RS_72_customer_c_customer_sk_min) AND DynamicValue(RS_72_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_72_customer_c_customer_sk_bloom_filter))) - TableScan [TS_56] (rows=575995635 width=122) + Filter Operator [FIL_343] (rows=525333486 width=122) + predicate:(ss_customer_sk is not null and ss_store_sk is not null and ss_ticket_number is not null and ss_item_sk is not null and ss_customer_sk BETWEEN DynamicValue(RS_77_customer_c_customer_sk_min) AND DynamicValue(RS_77_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_77_customer_c_customer_sk_bloom_filter))) + TableScan [TS_49] (rows=575995635 width=122) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_340] - Group By Operator [GBY_339] (rows=1 width=12) + BROADCAST [RS_342] + Group By Operator [GBY_341] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=6636187)"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_254] - Group By Operator [GBY_253] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_226] + Group By Operator [GBY_225] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=6636187)"] - Select Operator [SEL_252] (rows=7276996 width=8) + Select Operator [SEL_224] (rows=7276996 width=4) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_21] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_338] - Filter Operator [FIL_337] (rows=3939496 width=380) - predicate:_col3 is not null - Select Operator [SEL_336] (rows=3939496 width=380) - Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_335] (rows=3939496 width=380) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col9)"],keys:_col4, _col5, _col7 - Select Operator [SEL_334] (rows=84010488 width=843) - Output:["_col4","_col5","_col7","_col9"] - Group By Operator [GBY_333] (rows=84010488 width=843) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_36] (rows=84010488 width=843) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col4)"],keys:_col12, _col13, _col20, _col6, _col7, _col8, _col9, _col16, _col21 - Merge Join Operator [MERGEJOIN_299] (rows=138508741 width=824) - Conds:RS_32._col0, _col3=RS_331._col0, _col1(Inner),Output:["_col4","_col6","_col7","_col8","_col9","_col12","_col13","_col16","_col20","_col21"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_331] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_330] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col0, _col3 - Merge Join Operator [MERGEJOIN_298] (rows=84010488 width=820) - Conds:RS_29._col1, _col2=RS_30._col0, _col9(Inner),Output:["_col0","_col3","_col4","_col6","_col7","_col8","_col9","_col12","_col13","_col16","_col20","_col21"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col0, _col9 - Select Operator [SEL_22] (rows=7276996 width=724) - Output:["_col0","_col2","_col3","_col6","_col9","_col10","_col11"] - Please refer to the previous Filter Operator [FIL_21] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_295] (rows=76612563 width=382) - Conds:RS_319._col0=RS_310._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_310] - PartitionCols:_col0 - Select Operator [SEL_308] (rows=7000 width=295) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_306] (rows=7000 width=384) - predicate:((i_color = 'orchid') and i_item_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] - PartitionCols:_col0 - Select Operator [SEL_318] (rows=525333486 width=122) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_317] (rows=525333486 width=122) - predicate:(ss_customer_sk is not null and ss_store_sk is not null and ss_ticket_number is not null and ss_item_sk is not null and ss_item_sk BETWEEN DynamicValue(RS_27_item_i_item_sk_min) AND DynamicValue(RS_27_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_27_item_i_item_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=122) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_316] - Group By Operator [GBY_315] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_314] - Group By Operator [GBY_313] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_311] (rows=7000 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_308] + Please refer to the previous Select Operator [SEL_26] diff --git a/ql/src/test/results/clientpositive/perf/tez/query25.q.out b/ql/src/test/results/clientpositive/perf/tez/query25.q.out index 409e055b01..5711a529b7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query25.q.out @@ -109,163 +109,163 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Map 14 <- Reducer 12 (BROADCAST_EDGE) -Reducer 10 <- Map 14 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 12 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 15 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 16 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Map 15 <- Reducer 13 (BROADCAST_EDGE) +Map 6 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Map 15 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 13 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 16 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Map 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 8 <- Reducer 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_258] - Limit [LIM_257] (rows=100 width=808) + Reducer 5 vectorized + File Output Operator [FS_259] + Limit [LIM_258] (rows=100 width=808) Number of rows:100 - Select Operator [SEL_256] (rows=4248052806 width=808) + Select Operator [SEL_257] (rows=4248052806 width=808) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_255] - Group By Operator [GBY_254] (rows=4248052806 width=808) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_256] + Group By Operator [GBY_255] (rows=4248052806 width=808) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_49] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_50] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_48] (rows=4248052806 width=808) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col5)","sum(_col16)","sum(_col10)"],keys:_col19, _col20, _col22, _col23 - Top N Key Operator [TNK_98] (rows=4248052806 width=807) - keys:_col19, _col20, _col22, _col23,top n:100 - Merge Join Operator [MERGEJOIN_219] (rows=4248052806 width=807) - Conds:RS_44._col3=RS_253._col0(Inner),Output:["_col5","_col10","_col16","_col19","_col20","_col22","_col23"] + Group By Operator [GBY_49] (rows=4248052806 width=808) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col8)","sum(_col19)","sum(_col13)"],keys:_col1, _col2, _col22, _col23 + Top N Key Operator [TNK_99] (rows=4248052806 width=807) + keys:_col1, _col2, _col22, _col23,top n:100 + Merge Join Operator [MERGEJOIN_220] (rows=4248052806 width=807) + Conds:RS_45._col6=RS_254._col0(Inner),Output:["_col1","_col2","_col8","_col13","_col19","_col22","_col23"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_253] + SHUFFLE [RS_254] PartitionCols:_col0 - Select Operator [SEL_252] (rows=1704 width=192) + Select Operator [SEL_253] (rows=1704 width=192) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_251] (rows=1704 width=192) + Filter Operator [FIL_252] (rows=1704 width=192) predicate:s_store_sk is not null - TableScan [TS_32] (rows=1704 width=192) + TableScan [TS_39] (rows=1704 width=192) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_218] (rows=4248052806 width=623) - Conds:RS_41._col1=RS_250._col0(Inner),Output:["_col3","_col5","_col10","_col16","_col19","_col20"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_250] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col6 + Merge Join Operator [MERGEJOIN_219] (rows=4248052806 width=623) + Conds:RS_223._col0=RS_43._col1(Inner),Output:["_col1","_col2","_col6","_col8","_col13","_col19"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_223] PartitionCols:_col0 - Select Operator [SEL_249] (rows=462000 width=288) + Select Operator [SEL_222] (rows=462000 width=288) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_248] (rows=462000 width=288) + Filter Operator [FIL_221] (rows=462000 width=288) predicate:i_item_sk is not null - TableScan [TS_29] (rows=462000 width=288) + TableScan [TS_0] (rows=462000 width=288) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_41] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_43] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_217] (rows=4248052806 width=343) - Conds:RS_38._col1, _col2, _col4=RS_39._col6, _col7, _col8(Inner),Output:["_col1","_col3","_col5","_col10","_col16"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_39] + Merge Join Operator [MERGEJOIN_218] (rows=4248052806 width=343) + Conds:RS_35._col1, _col2, _col4=RS_36._col6, _col7, _col8(Inner),Output:["_col1","_col3","_col5","_col10","_col16"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_36] PartitionCols:_col6, _col7, _col8 - Merge Join Operator [MERGEJOIN_216] (rows=1893811716 width=235) - Conds:RS_25._col2, _col1=RS_26._col1, _col2(Inner),Output:["_col3","_col6","_col7","_col8","_col9"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_25] + Merge Join Operator [MERGEJOIN_217] (rows=1893811716 width=235) + Conds:RS_28._col2, _col1=RS_29._col1, _col2(Inner),Output:["_col3","_col6","_col7","_col8","_col9"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_28] PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_214] (rows=54418158 width=119) - Conds:RS_244._col0=RS_228._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_228] + Merge Join Operator [MERGEJOIN_215] (rows=54418158 width=119) + Conds:RS_248._col0=RS_232._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_232] PartitionCols:_col0 - Select Operator [SEL_224] (rows=351 width=4) + Select Operator [SEL_228] (rows=351 width=4) Output:["_col0"] - Filter Operator [FIL_221] (rows=351 width=12) + Filter Operator [FIL_225] (rows=351 width=12) predicate:((d_year = 2000) and d_moy BETWEEN 4 AND 10 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=12) + TableScan [TS_6] (rows=73049 width=12) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_244] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_248] PartitionCols:_col0 - Select Operator [SEL_243] (rows=285117831 width=123) + Select Operator [SEL_247] (rows=285117831 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_242] (rows=285117831 width=123) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_23_d3_d_date_sk_min) AND DynamicValue(RS_23_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_23_d3_d_date_sk_bloom_filter))) - TableScan [TS_6] (rows=287989836 width=123) + Filter Operator [FIL_246] (rows=285117831 width=123) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_26_d3_d_date_sk_min) AND DynamicValue(RS_26_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_26_d3_d_date_sk_bloom_filter))) + TableScan [TS_9] (rows=287989836 width=123) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_net_profit"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_241] - Group By Operator [GBY_240] (rows=1 width=12) + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_245] + Group By Operator [GBY_244] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_234] - Group By Operator [GBY_232] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_238] + Group By Operator [GBY_236] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_229] (rows=351 width=4) + Select Operator [SEL_233] (rows=351 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_224] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_26] + Please refer to the previous Select Operator [SEL_228] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_29] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_215] (rows=9402909 width=100) - Conds:RS_247._col0=RS_230._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_230] + Merge Join Operator [MERGEJOIN_216] (rows=9402909 width=100) + Conds:RS_251._col0=RS_234._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_234] PartitionCols:_col0 - Select Operator [SEL_225] (rows=351 width=4) + Select Operator [SEL_229] (rows=351 width=4) Output:["_col0"] - Filter Operator [FIL_222] (rows=351 width=12) + Filter Operator [FIL_226] (rows=351 width=12) predicate:((d_year = 2000) and d_moy BETWEEN 4 AND 10 and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_247] + Please refer to the previous TableScan [TS_6] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_251] PartitionCols:_col0 - Select Operator [SEL_246] (rows=53632139 width=123) + Select Operator [SEL_250] (rows=53632139 width=123) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_245] (rows=53632139 width=123) + Filter Operator [FIL_249] (rows=53632139 width=123) predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_12] (rows=57591150 width=123) + TableScan [TS_15] (rows=57591150 width=123) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_net_loss"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_38] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_35] PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_213] (rows=13737330 width=8) - Conds:RS_239._col0=RS_226._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_226] + Merge Join Operator [MERGEJOIN_214] (rows=13737330 width=8) + Conds:RS_243._col0=RS_230._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_230] PartitionCols:_col0 - Select Operator [SEL_223] (rows=50 width=4) + Select Operator [SEL_227] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_220] (rows=50 width=12) + Filter Operator [FIL_224] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 4) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_239] + Please refer to the previous TableScan [TS_6] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_243] PartitionCols:_col0 - Select Operator [SEL_238] (rows=501694138 width=126) + Select Operator [SEL_242] (rows=501694138 width=126) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_237] (rows=501694138 width=126) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=126) + Filter Operator [FIL_241] (rows=501694138 width=126) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_33_d1_d_date_sk_min) AND DynamicValue(RS_33_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_33_d1_d_date_sk_bloom_filter))) + TableScan [TS_3] (rows=575995635 width=126) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_net_profit"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_236] - Group By Operator [GBY_235] (rows=1 width=12) + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_240] + Group By Operator [GBY_239] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_233] - Group By Operator [GBY_231] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_237] + Group By Operator [GBY_235] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_227] (rows=50 width=4) + Select Operator [SEL_231] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_223] + Please refer to the previous Select Operator [SEL_227] diff --git a/ql/src/test/results/clientpositive/perf/tez/query26.q.out b/ql/src/test/results/clientpositive/perf/tez/query26.q.out index 0daa8cc691..5f16848aae 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query26.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query26.q.out @@ -53,109 +53,109 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Map 5 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 7 <- Map 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_130] - Limit [LIM_129] (rows=100 width=444) + Reducer 4 vectorized + File Output Operator [FS_131] + Limit [LIM_130] (rows=100 width=444) Number of rows:100 - Select Operator [SEL_128] (rows=310774 width=444) + Select Operator [SEL_129] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_127] - Select Operator [SEL_126] (rows=310774 width=444) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] + Select Operator [SEL_127] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_125] (rows=310774 width=476) + Group By Operator [GBY_126] (rows=310774 width=476) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_29] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col0 - Group By Operator [GBY_28] (rows=462000 width=476) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col12 - Top N Key Operator [TNK_58] (rows=809521 width=100) - keys:_col12,top n:100 - Merge Join Operator [MERGEJOIN_104] (rows=809521 width=100) - Conds:RS_24._col2=RS_124._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col12"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_124] + Group By Operator [GBY_29] (rows=462000 width=476) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col6)","count(_col6)","sum(_col7)","count(_col7)","sum(_col9)","count(_col9)","sum(_col8)","count(_col8)"],keys:_col1 + Top N Key Operator [TNK_59] (rows=809521 width=100) + keys:_col1,top n:100 + Merge Join Operator [MERGEJOIN_105] (rows=809521 width=100) + Conds:RS_108._col0=RS_26._col2(Inner),Output:["_col1","_col6","_col7","_col8","_col9"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_108] PartitionCols:_col0 - Select Operator [SEL_123] (rows=462000 width=104) + Select Operator [SEL_107] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_122] (rows=462000 width=104) + Filter Operator [FIL_106] (rows=462000 width=104) predicate:i_item_sk is not null - TableScan [TS_12] (rows=462000 width=104) + TableScan [TS_0] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_26] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_103] (rows=809521 width=4) - Conds:RS_21._col3=RS_121._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_121] + Merge Join Operator [MERGEJOIN_104] (rows=809521 width=4) + Conds:RS_21._col3=RS_125._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] PartitionCols:_col0 - Select Operator [SEL_120] (rows=2300 width=4) + Select Operator [SEL_124] (rows=2300 width=4) Output:["_col0"] - Filter Operator [FIL_119] (rows=2300 width=174) + Filter Operator [FIL_123] (rows=2300 width=174) predicate:(((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk is not null) - TableScan [TS_9] (rows=2300 width=174) + TableScan [TS_12] (rows=2300 width=174) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_email","p_channel_event"] - <-Reducer 3 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_102] (rows=809521 width=4) - Conds:RS_18._col0=RS_118._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] + Merge Join Operator [MERGEJOIN_103] (rows=809521 width=4) + Conds:RS_18._col0=RS_122._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_122] PartitionCols:_col0 - Select Operator [SEL_117] (rows=652 width=4) + Select Operator [SEL_121] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_116] (rows=652 width=8) + Filter Operator [FIL_120] (rows=652 width=8) predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=8) + TableScan [TS_9] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_101] (rows=2283326 width=135) - Conds:RS_115._col1=RS_107._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_107] + Merge Join Operator [MERGEJOIN_102] (rows=2283326 width=135) + Conds:RS_119._col1=RS_111._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_111] PartitionCols:_col0 - Select Operator [SEL_106] (rows=14776 width=4) + Select Operator [SEL_110] (rows=14776 width=4) Output:["_col0"] - Filter Operator [FIL_105] (rows=14776 width=268) + Filter Operator [FIL_109] (rows=14776 width=268) predicate:((cd_marital_status = 'W') and (cd_education_status = 'Primary') and (cd_gender = 'F') and cd_demo_sk is not null) - TableScan [TS_3] (rows=1861800 width=268) + TableScan [TS_6] (rows=1861800 width=268) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_115] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_119] PartitionCols:_col1 - Select Operator [SEL_114] (rows=283691050 width=354) + Select Operator [SEL_118] (rows=283691050 width=354) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_113] (rows=283691050 width=354) + Filter Operator [FIL_117] (rows=283691050 width=354) predicate:(cs_promo_sk is not null and cs_sold_date_sk is not null and cs_bill_cdemo_sk is not null and cs_item_sk is not null and cs_bill_cdemo_sk BETWEEN DynamicValue(RS_16_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_16_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_16_customer_demographics_cd_demo_sk_bloom_filter))) - TableScan [TS_0] (rows=287989836 width=354) + TableScan [TS_3] (rows=287989836 width=354) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_cdemo_sk","cs_item_sk","cs_promo_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_112] - Group By Operator [GBY_111] (rows=1 width=12) + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_116] + Group By Operator [GBY_115] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_110] - Group By Operator [GBY_109] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_114] + Group By Operator [GBY_113] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_108] (rows=14776 width=4) + Select Operator [SEL_112] (rows=14776 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_106] + Please refer to the previous Select Operator [SEL_110] diff --git a/ql/src/test/results/clientpositive/perf/tez/query27.q.out b/ql/src/test/results/clientpositive/perf/tez/query27.q.out index 78037b08f0..6469b8837e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query27.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query27.q.out @@ -57,111 +57,111 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Map 5 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 7 <- Map 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_131] - Limit [LIM_130] (rows=100 width=538) + Reducer 4 vectorized + File Output Operator [FS_132] + Limit [LIM_131] (rows=100 width=538) Number of rows:100 - Select Operator [SEL_129] (rows=4281825 width=538) + Select Operator [SEL_130] (rows=4281825 width=538) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] - Select Operator [SEL_127] (rows=4281825 width=538) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_129] + Select Operator [SEL_128] (rows=4281825 width=538) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_126] (rows=4281825 width=570) + Group By Operator [GBY_127] (rows=4281825 width=570) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_30] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_31] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_29] (rows=4281825 width=570) + Group By Operator [GBY_30] (rows=4281825 width=570) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col2)","count(_col2)","sum(_col3)","count(_col3)","sum(_col4)","count(_col4)","sum(_col5)","count(_col5)"],keys:_col0, _col1, 0L - Select Operator [SEL_27] (rows=1427275 width=186) + Select Operator [SEL_28] (rows=1427275 width=186) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Top N Key Operator [TNK_59] (rows=1427275 width=186) - keys:_col13, _col11,top n:100 - Merge Join Operator [MERGEJOIN_105] (rows=1427275 width=186) - Conds:RS_24._col1=RS_125._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col11","_col13"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_125] + Top N Key Operator [TNK_60] (rows=1427275 width=186) + keys:_col1, _col13,top n:100 + Merge Join Operator [MERGEJOIN_106] (rows=1427275 width=186) + Conds:RS_109._col0=RS_26._col1(Inner),Output:["_col1","_col6","_col7","_col8","_col9","_col13"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_109] PartitionCols:_col0 - Select Operator [SEL_124] (rows=462000 width=104) + Select Operator [SEL_108] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_123] (rows=462000 width=104) + Filter Operator [FIL_107] (rows=462000 width=104) predicate:i_item_sk is not null - TableScan [TS_12] (rows=462000 width=104) + TableScan [TS_0] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_26] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_104] (rows=1427275 width=90) - Conds:RS_21._col3=RS_122._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col11"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_122] + Merge Join Operator [MERGEJOIN_105] (rows=1427275 width=90) + Conds:RS_21._col3=RS_126._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col11"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_126] PartitionCols:_col0 - Select Operator [SEL_121] (rows=209 width=90) + Select Operator [SEL_125] (rows=209 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_120] (rows=209 width=90) + Filter Operator [FIL_124] (rows=209 width=90) predicate:((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) - TableScan [TS_9] (rows=1704 width=90) + TableScan [TS_12] (rows=1704 width=90) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] - <-Reducer 3 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_103] (rows=1441779 width=4) - Conds:RS_18._col0=RS_119._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_119] + Merge Join Operator [MERGEJOIN_104] (rows=1441779 width=4) + Conds:RS_18._col0=RS_123._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_123] PartitionCols:_col0 - Select Operator [SEL_118] (rows=652 width=4) + Select Operator [SEL_122] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_117] (rows=652 width=8) + Filter Operator [FIL_121] (rows=652 width=8) predicate:((d_year = 2001) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=8) + TableScan [TS_9] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_102] (rows=4037920 width=4) - Conds:RS_116._col2=RS_108._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_108] + Merge Join Operator [MERGEJOIN_103] (rows=4037920 width=4) + Conds:RS_120._col2=RS_112._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_112] PartitionCols:_col0 - Select Operator [SEL_107] (rows=14776 width=4) + Select Operator [SEL_111] (rows=14776 width=4) Output:["_col0"] - Filter Operator [FIL_106] (rows=14776 width=268) + Filter Operator [FIL_110] (rows=14776 width=268) predicate:((cd_marital_status = 'U') and (cd_education_status = '2 yr Degree') and (cd_gender = 'M') and cd_demo_sk is not null) - TableScan [TS_3] (rows=1861800 width=268) + TableScan [TS_6] (rows=1861800 width=268) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_116] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_120] PartitionCols:_col2 - Select Operator [SEL_115] (rows=501690006 width=340) + Select Operator [SEL_119] (rows=501690006 width=340) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_114] (rows=501690006 width=340) + Filter Operator [FIL_118] (rows=501690006 width=340) predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_cdemo_sk BETWEEN DynamicValue(RS_16_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_16_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_16_customer_demographics_cd_demo_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=340) + TableScan [TS_3] (rows=575995635 width=340) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_store_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_113] - Group By Operator [GBY_112] (rows=1 width=12) + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_117] + Group By Operator [GBY_116] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_111] - Group By Operator [GBY_110] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_115] + Group By Operator [GBY_114] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_109] (rows=14776 width=4) + Select Operator [SEL_113] (rows=14776 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_107] + Please refer to the previous Select Operator [SEL_111] diff --git a/ql/src/test/results/clientpositive/perf/tez/query29.q.out b/ql/src/test/results/clientpositive/perf/tez/query29.q.out index 00b9fbaf0c..37fcaf3fb3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query29.q.out @@ -108,165 +108,163 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Reducer 7 (BROADCAST_EDGE) -Map 8 <- Reducer 14 (BROADCAST_EDGE) -Reducer 10 <- Reducer 15 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 17 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 18 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 13 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Map 11 <- Reducer 15 (BROADCAST_EDGE) +Reducer 10 <- Map 18 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 13 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_249] - Limit [LIM_248] (rows=100 width=496) + File Output Operator [FS_250] + Limit [LIM_249] (rows=100 width=496) Number of rows:100 - Select Operator [SEL_247] (rows=21091879 width=496) + Select Operator [SEL_248] (rows=21091879 width=496) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_246] - Group By Operator [GBY_245] (rows=21091879 width=496) + SHUFFLE [RS_247] + Group By Operator [GBY_246] (rows=21091879 width=496) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_49] + SHUFFLE [RS_50] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_48] (rows=21091879 width=496) + Group By Operator [GBY_49] (rows=21091879 width=496) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col13)","sum(_col19)","sum(_col3)"],keys:_col6, _col7, _col22, _col23 - Top N Key Operator [TNK_96] (rows=4156223234 width=483) + Top N Key Operator [TNK_97] (rows=4156223234 width=483) keys:_col6, _col7, _col22, _col23,top n:100 - Merge Join Operator [MERGEJOIN_210] (rows=4156223234 width=483) - Conds:RS_44._col2, _col1=RS_45._col11, _col12(Inner),Output:["_col3","_col6","_col7","_col13","_col19","_col22","_col23"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_45] + Merge Join Operator [MERGEJOIN_211] (rows=4156223234 width=483) + Conds:RS_45._col2, _col1=RS_46._col11, _col12(Inner),Output:["_col3","_col6","_col7","_col13","_col19","_col22","_col23"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_46] PartitionCols:_col11, _col12 - Select Operator [SEL_40] (rows=21091879 width=484) - Output:["_col1","_col2","_col8","_col11","_col12","_col14","_col17","_col18"] - Merge Join Operator [MERGEJOIN_209] (rows=21091879 width=484) - Conds:RS_37._col3=RS_244._col0(Inner),Output:["_col5","_col8","_col9","_col11","_col14","_col15","_col17","_col18"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_244] - PartitionCols:_col0 - Select Operator [SEL_243] (rows=1704 width=192) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_242] (rows=1704 width=192) - predicate:s_store_sk is not null - TableScan [TS_25] (rows=1704 width=192) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_208] (rows=21091879 width=298) - Conds:RS_34._col1=RS_241._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col11","_col14","_col15"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_241] - PartitionCols:_col0 - Select Operator [SEL_240] (rows=462000 width=288) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_239] (rows=462000 width=288) - predicate:i_item_sk is not null - TableScan [TS_22] (rows=462000 width=288) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_207] (rows=21091879 width=18) - Conds:RS_31._col1, _col2, _col4=RS_32._col1, _col2, _col3(Inner),Output:["_col1","_col3","_col5","_col8","_col9","_col11"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col1, _col2, _col3 - Merge Join Operator [MERGEJOIN_206] (rows=5384572 width=13) - Conds:RS_238._col0=RS_228._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_228] - PartitionCols:_col0 - Select Operator [SEL_225] (rows=201 width=4) - Output:["_col0"] - Filter Operator [FIL_223] (rows=201 width=12) - predicate:((d_year = 1999) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=12) - default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_238] - PartitionCols:_col0 - Select Operator [SEL_237] (rows=53632139 width=19) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_236] (rows=53632139 width=19) - predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_12] (rows=57591150 width=19) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_205] (rows=13737330 width=8) - Conds:RS_235._col0=RS_226._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_226] - PartitionCols:_col0 - Select Operator [SEL_224] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_222] (rows=50 width=12) - predicate:((d_year = 1999) and (d_moy = 4) and d_date_sk is not null) - Please refer to the previous TableScan [TS_9] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_235] - PartitionCols:_col0 - Select Operator [SEL_234] (rows=501694138 width=23) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_233] (rows=501694138 width=23) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_29_d1_d_date_sk_min) AND DynamicValue(RS_29_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_29_d1_d_date_sk_bloom_filter))) - TableScan [TS_6] (rows=575995635 width=23) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_232] - Group By Operator [GBY_231] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_230] - Group By Operator [GBY_229] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_227] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_224] + Merge Join Operator [MERGEJOIN_210] (rows=21091879 width=484) + Conds:RS_38._col6=RS_245._col0(Inner),Output:["_col1","_col2","_col8","_col11","_col12","_col14","_col17","_col18"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_245] + PartitionCols:_col0 + Select Operator [SEL_244] (rows=1704 width=192) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_243] (rows=1704 width=192) + predicate:s_store_sk is not null + TableScan [TS_32] (rows=1704 width=192) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col6 + Merge Join Operator [MERGEJOIN_209] (rows=21091879 width=298) + Conds:RS_225._col0=RS_36._col1(Inner),Output:["_col1","_col2","_col6","_col8","_col11","_col12","_col14"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_225] + PartitionCols:_col0 + Select Operator [SEL_224] (rows=462000 width=288) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_223] (rows=462000 width=288) + predicate:i_item_sk is not null + TableScan [TS_6] (rows=462000 width=288) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_208] (rows=21091879 width=18) + Conds:RS_28._col1, _col2, _col4=RS_29._col1, _col2, _col3(Inner),Output:["_col1","_col3","_col5","_col8","_col9","_col11"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col1, _col2, _col4 + Merge Join Operator [MERGEJOIN_206] (rows=13737330 width=8) + Conds:RS_239._col0=RS_230._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_230] + PartitionCols:_col0 + Select Operator [SEL_228] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_226] (rows=50 width=12) + predicate:((d_year = 1999) and (d_moy = 4) and d_date_sk is not null) + TableScan [TS_12] (rows=73049 width=12) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_239] + PartitionCols:_col0 + Select Operator [SEL_238] (rows=501694138 width=23) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_237] (rows=501694138 width=23) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_26_d1_d_date_sk_min) AND DynamicValue(RS_26_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_26_d1_d_date_sk_bloom_filter))) + TableScan [TS_9] (rows=575995635 width=23) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_236] + Group By Operator [GBY_235] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_234] + Group By Operator [GBY_233] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_231] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_228] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_207] (rows=5384572 width=13) + Conds:RS_242._col0=RS_232._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_232] + PartitionCols:_col0 + Select Operator [SEL_229] (rows=201 width=4) + Output:["_col0"] + Filter Operator [FIL_227] (rows=201 width=12) + predicate:((d_year = 1999) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) + Please refer to the previous TableScan [TS_12] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_242] + PartitionCols:_col0 + Select Operator [SEL_241] (rows=53632139 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_240] (rows=53632139 width=19) + predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_item_sk is not null and sr_ticket_number is not null) + TableScan [TS_15] (rows=57591150 width=19) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_44] + SHUFFLE [RS_45] PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_204] (rows=7638375 width=10) - Conds:RS_221._col0=RS_213._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_205] (rows=7638375 width=10) + Conds:RS_222._col0=RS_214._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_213] + PARTITION_ONLY_SHUFFLE [RS_214] PartitionCols:_col0 - Select Operator [SEL_212] (rows=1957 width=4) + Select Operator [SEL_213] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_211] (rows=1957 width=8) + Filter Operator [FIL_212] (rows=1957 width=8) predicate:((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=8) default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_221] + SHUFFLE [RS_222] PartitionCols:_col0 - Select Operator [SEL_220] (rows=285117831 width=15) + Select Operator [SEL_221] (rows=285117831 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_219] (rows=285117831 width=15) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_42_d3_d_date_sk_min) AND DynamicValue(RS_42_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_d3_d_date_sk_bloom_filter))) + Filter Operator [FIL_220] (rows=285117831 width=15) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_43_d3_d_date_sk_min) AND DynamicValue(RS_43_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_43_d3_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=287989836 width=15) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_218] - Group By Operator [GBY_217] (rows=1 width=12) + BROADCAST [RS_219] + Group By Operator [GBY_218] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_216] - Group By Operator [GBY_215] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_217] + Group By Operator [GBY_216] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_214] (rows=1957 width=4) + Select Operator [SEL_215] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_212] + Please refer to the previous Select Operator [SEL_213] diff --git a/ql/src/test/results/clientpositive/perf/tez/query30.q.out b/ql/src/test/results/clientpositive/perf/tez/query30.q.out index 5b1e1fa685..f98b5fd941 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query30.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query30.q.out @@ -87,141 +87,141 @@ Stage-0 limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_216] - Limit [LIM_215] (rows=100 width=942) + File Output Operator [FS_218] + Limit [LIM_217] (rows=100 width=942) Number of rows:100 - Select Operator [SEL_214] (rows=704993 width=930) + Select Operator [SEL_216] (rows=704993 width=930) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_66] - Select Operator [SEL_65] (rows=704993 width=930) + SHUFFLE [RS_68] + Select Operator [SEL_67] (rows=704993 width=930) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - Top N Key Operator [TNK_104] (rows=704993 width=942) + Top N Key Operator [TNK_106] (rows=704993 width=942) keys:_col1, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col17,top n:100 - Merge Join Operator [MERGEJOIN_181] (rows=704993 width=942) - Conds:RS_62._col0=RS_63._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col17"] + Merge Join Operator [MERGEJOIN_183] (rows=704993 width=942) + Conds:RS_64._col0=RS_65._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col17"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_62] + SHUFFLE [RS_64] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_175] (rows=1568628 width=834) - Conds:RS_184._col2=RS_191._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Merge Join Operator [MERGEJOIN_177] (rows=1568628 width=834) + Conds:RS_186._col2=RS_193._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_191] + SHUFFLE [RS_193] PartitionCols:_col0 - Select Operator [SEL_188] (rows=784314 width=4) + Select Operator [SEL_190] (rows=784314 width=4) Output:["_col0"] - Filter Operator [FIL_185] (rows=784314 width=90) + Filter Operator [FIL_187] (rows=784314 width=90) predicate:((ca_state = 'IL') and ca_address_sk is not null) TableScan [TS_3] (rows=40000000 width=90) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_184] + SHUFFLE [RS_186] PartitionCols:_col2 - Select Operator [SEL_183] (rows=80000000 width=849) + Select Operator [SEL_185] (rows=80000000 width=849) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Filter Operator [FIL_182] (rows=80000000 width=849) + Filter Operator [FIL_184] (rows=80000000 width=849) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) TableScan [TS_0] (rows=80000000 width=849) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_day","c_birth_month","c_birth_year","c_birth_country","c_login","c_email_address","c_last_review_date"] <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_63] + SHUFFLE [RS_65] PartitionCols:_col0 - Select Operator [SEL_58] (rows=704993 width=227) + Select Operator [SEL_60] (rows=704993 width=227) Output:["_col0","_col2"] - Filter Operator [FIL_57] (rows=704993 width=227) + Filter Operator [FIL_59] (rows=704993 width=227) predicate:(_col2 > _col3) - Merge Join Operator [MERGEJOIN_180] (rows=2114980 width=227) - Conds:RS_207._col1=RS_213._col1(Inner),Output:["_col0","_col2","_col3"] + Merge Join Operator [MERGEJOIN_182] (rows=2114980 width=227) + Conds:RS_209._col1=RS_215._col1(Inner),Output:["_col0","_col2","_col3"] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_213] + SHUFFLE [RS_215] PartitionCols:_col1 - Select Operator [SEL_212] (rows=6 width=198) + Select Operator [SEL_214] (rows=6 width=198) Output:["_col0","_col1"] - Filter Operator [FIL_211] (rows=6 width=206) + Filter Operator [FIL_213] (rows=6 width=206) predicate:CAST( (_col1 / _col2) AS decimal(21,6)) is not null - Group By Operator [GBY_210] (rows=6 width=206) + Group By Operator [GBY_212] (rows=6 width=206) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col0 - Select Operator [SEL_209] (rows=2537976 width=201) + Select Operator [SEL_211] (rows=2537976 width=201) Output:["_col0","_col2"] - Group By Operator [GBY_208] (rows=2537976 width=201) + Group By Operator [GBY_210] (rows=2537976 width=201) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_45] + SHUFFLE [RS_47] PartitionCols:_col0 - Group By Operator [GBY_44] (rows=3923529 width=201) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col6, _col1 - Merge Join Operator [MERGEJOIN_179] (rows=3923529 width=184) - Conds:RS_40._col2=RS_193._col0(Inner),Output:["_col1","_col3","_col6"] + Group By Operator [GBY_46] (rows=3923529 width=201) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col5)"],keys:_col1, _col3 + Merge Join Operator [MERGEJOIN_181] (rows=3923529 width=184) + Conds:RS_195._col0=RS_43._col2(Inner),Output:["_col1","_col3","_col5"] <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] + SHUFFLE [RS_195] PartitionCols:_col0 - Select Operator [SEL_190] (rows=40000000 width=90) + Select Operator [SEL_192] (rows=40000000 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_187] (rows=40000000 width=90) + Filter Operator [FIL_189] (rows=40000000 width=90) predicate:(ca_address_sk is not null and ca_state is not null) Please refer to the previous TableScan [TS_3] <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_40] + SHUFFLE [RS_43] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_178] (rows=3923529 width=101) - Conds:RS_199._col0=RS_203._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_180] (rows=3923529 width=101) + Conds:RS_201._col0=RS_205._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_199] + SHUFFLE [RS_201] PartitionCols:_col0 - Select Operator [SEL_197] (rows=13130761 width=118) + Select Operator [SEL_199] (rows=13130761 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_195] (rows=13130761 width=118) + Filter Operator [FIL_197] (rows=13130761 width=118) predicate:(wr_returned_date_sk is not null and wr_returning_addr_sk is not null) - TableScan [TS_6] (rows=14398467 width=118) + TableScan [TS_9] (rows=14398467 width=118) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_returning_customer_sk","wr_returning_addr_sk","wr_return_amt"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_203] + SHUFFLE [RS_205] PartitionCols:_col0 - Select Operator [SEL_201] (rows=652 width=4) + Select Operator [SEL_203] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_200] (rows=652 width=8) + Filter Operator [FIL_202] (rows=652 width=8) predicate:((d_year = 2002) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=8) + TableScan [TS_12] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_207] + SHUFFLE [RS_209] PartitionCols:_col1 - Filter Operator [FIL_206] (rows=2114980 width=201) + Filter Operator [FIL_208] (rows=2114980 width=201) predicate:_col2 is not null - Select Operator [SEL_205] (rows=2114980 width=201) + Select Operator [SEL_207] (rows=2114980 width=201) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_204] (rows=2114980 width=201) + Group By Operator [GBY_206] (rows=2114980 width=201) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_23] + SHUFFLE [RS_24] PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=3746772 width=201) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col6, _col1 - Merge Join Operator [MERGEJOIN_177] (rows=3746772 width=184) - Conds:RS_18._col2=RS_192._col0(Inner),Output:["_col1","_col3","_col6"] + Group By Operator [GBY_23] (rows=3746772 width=201) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col5)"],keys:_col1, _col3 + Merge Join Operator [MERGEJOIN_179] (rows=3746772 width=184) + Conds:RS_194._col0=RS_20._col2(Inner),Output:["_col1","_col3","_col5"] <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_192] + SHUFFLE [RS_194] PartitionCols:_col0 - Select Operator [SEL_189] (rows=40000000 width=90) + Select Operator [SEL_191] (rows=40000000 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_186] (rows=40000000 width=90) + Filter Operator [FIL_188] (rows=40000000 width=90) predicate:(ca_address_sk is not null and ca_state is not null) Please refer to the previous TableScan [TS_3] <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_18] + SHUFFLE [RS_20] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_176] (rows=3746772 width=101) - Conds:RS_198._col0=RS_202._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_178] (rows=3746772 width=101) + Conds:RS_200._col0=RS_204._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] + SHUFFLE [RS_200] PartitionCols:_col0 - Select Operator [SEL_196] (rows=12539214 width=118) + Select Operator [SEL_198] (rows=12539214 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_194] (rows=12539214 width=118) + Filter Operator [FIL_196] (rows=12539214 width=118) predicate:(wr_returning_customer_sk is not null and wr_returned_date_sk is not null and wr_returning_addr_sk is not null) - Please refer to the previous TableScan [TS_6] + Please refer to the previous TableScan [TS_9] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_202] + SHUFFLE [RS_204] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_201] + Please refer to the previous Select Operator [SEL_203] diff --git a/ql/src/test/results/clientpositive/perf/tez/query31.q.out b/ql/src/test/results/clientpositive/perf/tez/query31.q.out index 0359a5e63f..16e2268190 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query31.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query31.q.out @@ -113,376 +113,376 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Map 33 <- Reducer 13 (BROADCAST_EDGE) -Map 34 <- Reducer 17 (BROADCAST_EDGE) -Map 35 <- Reducer 23 (BROADCAST_EDGE) -Map 36 <- Reducer 27 (BROADCAST_EDGE) -Map 37 <- Reducer 31 (BROADCAST_EDGE) -Reducer 10 <- Map 33 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 11 <- Map 32 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Map 19 <- Reducer 22 (BROADCAST_EDGE) +Map 33 <- Reducer 24 (BROADCAST_EDGE) +Map 34 <- Reducer 26 (BROADCAST_EDGE) +Map 35 <- Reducer 28 (BROADCAST_EDGE) +Map 36 <- Reducer 30 (BROADCAST_EDGE) +Map 37 <- Reducer 32 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 1 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 34 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 15 <- Map 32 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 15 <- Map 1 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 17 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 35 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 19 <- Map 32 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) -Reducer 23 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 36 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 25 <- Map 32 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 28 <- Map 37 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 29 <- Map 32 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) -Reducer 3 <- Map 32 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Reducer 29 (SIMPLE_EDGE) -Reducer 31 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 22 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 1 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 21 (SIMPLE_EDGE), Map 33 (SIMPLE_EDGE) +Reducer 24 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 21 (SIMPLE_EDGE), Map 34 (SIMPLE_EDGE) +Reducer 26 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 21 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) +Reducer 28 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Map 21 (SIMPLE_EDGE), Map 36 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 31 <- Map 21 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) +Reducer 32 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Reducer 10 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 14 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 - File Output Operator [FS_139] - Select Operator [SEL_138] (rows=110 width=550) + Reducer 6 + File Output Operator [FS_145] + Select Operator [SEL_144] (rows=110 width=550) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_136] (rows=110 width=778) + Filter Operator [FIL_142] (rows=110 width=778) predicate:(CASE WHEN ((_col9 > 0)) THEN (CASE WHEN (_col7) THEN (((_col4 / _col6) > (_col13 / _col9))) ELSE (false) END) ELSE (false) END and CASE WHEN ((_col11 > 0)) THEN (CASE WHEN (_col2) THEN (((_col6 / _col1) > (_col9 / _col11))) ELSE (false) END) ELSE (false) END) - Merge Join Operator [MERGEJOIN_450] (rows=440 width=778) - Conds:RS_133._col0=RS_134._col0(Inner),Output:["_col1","_col2","_col4","_col6","_col7","_col8","_col9","_col11","_col13"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_134] + Merge Join Operator [MERGEJOIN_456] (rows=440 width=778) + Conds:RS_139._col0=RS_140._col0(Inner),Output:["_col1","_col2","_col4","_col6","_col7","_col8","_col9","_col11","_col13"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_140] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_448] (rows=1605 width=434) - Conds:RS_123._col0=RS_538._col0(Inner),Output:["_col0","_col1","_col3","_col5"] - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_123] + Merge Join Operator [MERGEJOIN_454] (rows=1605 width=434) + Conds:RS_129._col0=RS_544._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_129] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_447] (rows=1605 width=322) - Conds:RS_524._col0=RS_531._col0(Inner),Output:["_col0","_col1","_col3"] - <-Reducer 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_524] + Merge Join Operator [MERGEJOIN_453] (rows=1605 width=322) + Conds:RS_530._col0=RS_537._col0(Inner),Output:["_col0","_col1","_col3"] + <-Reducer 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_530] PartitionCols:_col0 - Group By Operator [GBY_523] (rows=1605 width=210) + Group By Operator [GBY_529] (rows=1605 width=210) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_77] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_81] PartitionCols:_col0 - Group By Operator [GBY_76] (rows=33705 width=210) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_441] (rows=37399561 width=139) - Conds:RS_72._col1=RS_497._col0(Inner),Output:["_col2","_col5"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_497] + Group By Operator [GBY_80] (rows=33705 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col4)"],keys:_col1 + Merge Join Operator [MERGEJOIN_447] (rows=37399561 width=139) + Conds:RS_462._col0=RS_77._col1(Inner),Output:["_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_462] PartitionCols:_col0 - Select Operator [SEL_493] (rows=40000000 width=102) + Select Operator [SEL_458] (rows=40000000 width=102) Output:["_col0","_col1"] - Filter Operator [FIL_492] (rows=40000000 width=102) + Filter Operator [FIL_457] (rows=40000000 width=102) predicate:(ca_address_sk is not null and ca_county is not null) - TableScan [TS_6] (rows=40000000 width=102) + TableScan [TS_0] (rows=40000000 width=102) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_72] + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_77] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_440] (rows=37399561 width=42) - Conds:RS_522._col0=RS_469._col0(Inner),Output:["_col1","_col2"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_469] + Merge Join Operator [MERGEJOIN_446] (rows=37399561 width=42) + Conds:RS_528._col0=RS_483._col0(Inner),Output:["_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_483] PartitionCols:_col0 - Select Operator [SEL_460] (rows=130 width=4) + Select Operator [SEL_474] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_454] (rows=130 width=12) + Filter Operator [FIL_468] (rows=130 width=12) predicate:((d_year = 2000) and (d_qoy = 2) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=12) + TableScan [TS_6] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 35 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_522] + SHUFFLE [RS_528] PartitionCols:_col0 - Select Operator [SEL_521] (rows=525327191 width=114) + Select Operator [SEL_527] (rows=525327191 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_520] (rows=525327191 width=114) - predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_70_date_dim_d_date_sk_min) AND DynamicValue(RS_70_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_70_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_60] (rows=575995635 width=114) + Filter Operator [FIL_526] (rows=525327191 width=114) + predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_73_date_dim_d_date_sk_min) AND DynamicValue(RS_73_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_73_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_66] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_519] - Group By Operator [GBY_518] (rows=1 width=12) + <-Reducer 28 [BROADCAST_EDGE] vectorized + BROADCAST [RS_525] + Group By Operator [GBY_524] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_484] - Group By Operator [GBY_478] (rows=1 width=12) + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_498] + Group By Operator [GBY_492] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_470] (rows=130 width=4) + Select Operator [SEL_484] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_460] - <-Reducer 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_531] + Please refer to the previous Select Operator [SEL_474] + <-Reducer 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_537] PartitionCols:_col0 - Group By Operator [GBY_530] (rows=1605 width=210) + Group By Operator [GBY_536] (rows=1605 width=210) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_97] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_102] PartitionCols:_col0 - Group By Operator [GBY_96] (rows=33705 width=210) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_443] (rows=37399561 width=139) - Conds:RS_92._col1=RS_498._col0(Inner),Output:["_col2","_col5"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_498] + Group By Operator [GBY_101] (rows=33705 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col4)"],keys:_col1 + Merge Join Operator [MERGEJOIN_449] (rows=37399561 width=139) + Conds:RS_463._col0=RS_98._col1(Inner),Output:["_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_463] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_493] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_92] + Please refer to the previous Select Operator [SEL_458] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_98] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_442] (rows=37399561 width=42) - Conds:RS_529._col0=RS_471._col0(Inner),Output:["_col1","_col2"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_471] + Merge Join Operator [MERGEJOIN_448] (rows=37399561 width=42) + Conds:RS_535._col0=RS_485._col0(Inner),Output:["_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_485] PartitionCols:_col0 - Select Operator [SEL_461] (rows=130 width=4) + Select Operator [SEL_475] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_455] (rows=130 width=12) + Filter Operator [FIL_469] (rows=130 width=12) predicate:((d_year = 2000) and (d_qoy = 1) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] + Please refer to the previous TableScan [TS_6] <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_529] + SHUFFLE [RS_535] PartitionCols:_col0 - Select Operator [SEL_528] (rows=525327191 width=114) + Select Operator [SEL_534] (rows=525327191 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_527] (rows=525327191 width=114) - predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_90_date_dim_d_date_sk_min) AND DynamicValue(RS_90_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_90_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_80] (rows=575995635 width=114) + Filter Operator [FIL_533] (rows=525327191 width=114) + predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_94_date_dim_d_date_sk_min) AND DynamicValue(RS_94_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_94_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_87] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_526] - Group By Operator [GBY_525] (rows=1 width=12) + <-Reducer 30 [BROADCAST_EDGE] vectorized + BROADCAST [RS_532] + Group By Operator [GBY_531] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_485] - Group By Operator [GBY_479] (rows=1 width=12) + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_499] + Group By Operator [GBY_493] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_472] (rows=130 width=4) + Select Operator [SEL_486] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_461] - <-Reducer 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_538] + Please refer to the previous Select Operator [SEL_475] + <-Reducer 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_544] PartitionCols:_col0 - Group By Operator [GBY_537] (rows=1605 width=210) + Group By Operator [GBY_543] (rows=1605 width=210) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_117] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_123] PartitionCols:_col0 - Group By Operator [GBY_116] (rows=33705 width=210) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_445] (rows=37399561 width=139) - Conds:RS_112._col1=RS_499._col0(Inner),Output:["_col2","_col5"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_499] + Group By Operator [GBY_122] (rows=33705 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col4)"],keys:_col1 + Merge Join Operator [MERGEJOIN_451] (rows=37399561 width=139) + Conds:RS_464._col0=RS_119._col1(Inner),Output:["_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_464] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_493] - <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_112] + Please refer to the previous Select Operator [SEL_458] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_119] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_444] (rows=37399561 width=42) - Conds:RS_536._col0=RS_473._col0(Inner),Output:["_col1","_col2"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_473] + Merge Join Operator [MERGEJOIN_450] (rows=37399561 width=42) + Conds:RS_542._col0=RS_487._col0(Inner),Output:["_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_487] PartitionCols:_col0 - Select Operator [SEL_462] (rows=130 width=4) + Select Operator [SEL_476] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_456] (rows=130 width=12) + Filter Operator [FIL_470] (rows=130 width=12) predicate:((d_year = 2000) and (d_qoy = 3) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] + Please refer to the previous TableScan [TS_6] <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_536] + SHUFFLE [RS_542] PartitionCols:_col0 - Select Operator [SEL_535] (rows=525327191 width=114) + Select Operator [SEL_541] (rows=525327191 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_534] (rows=525327191 width=114) - predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_110_date_dim_d_date_sk_min) AND DynamicValue(RS_110_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_110_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_100] (rows=575995635 width=114) + Filter Operator [FIL_540] (rows=525327191 width=114) + predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_115_date_dim_d_date_sk_min) AND DynamicValue(RS_115_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_115_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_108] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 31 [BROADCAST_EDGE] vectorized - BROADCAST [RS_533] - Group By Operator [GBY_532] (rows=1 width=12) + <-Reducer 32 [BROADCAST_EDGE] vectorized + BROADCAST [RS_539] + Group By Operator [GBY_538] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_486] - Group By Operator [GBY_480] (rows=1 width=12) + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_500] + Group By Operator [GBY_494] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_474] (rows=130 width=4) + Select Operator [SEL_488] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_462] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_133] + Please refer to the previous Select Operator [SEL_476] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_139] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_449] (rows=440 width=442) - Conds:RS_130._col0=RS_517._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col6","_col7"] - <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_517] + Merge Join Operator [MERGEJOIN_455] (rows=440 width=442) + Conds:RS_136._col0=RS_523._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col6","_col7"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_523] PartitionCols:_col0 - Select Operator [SEL_516] (rows=440 width=214) + Select Operator [SEL_522] (rows=440 width=214) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_515] (rows=440 width=210) + Group By Operator [GBY_521] (rows=440 width=210) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_57] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_60] PartitionCols:_col0 - Group By Operator [GBY_56] (rows=3960 width=210) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_439] (rows=10246882 width=209) - Conds:RS_52._col1=RS_496._col0(Inner),Output:["_col2","_col5"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_496] + Group By Operator [GBY_59] (rows=3960 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col4)"],keys:_col1 + Merge Join Operator [MERGEJOIN_445] (rows=10246882 width=209) + Conds:RS_461._col0=RS_56._col1(Inner),Output:["_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_461] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_493] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_52] + Please refer to the previous Select Operator [SEL_458] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_56] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_438] (rows=10246882 width=115) - Conds:RS_514._col0=RS_467._col0(Inner),Output:["_col1","_col2"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_467] + Merge Join Operator [MERGEJOIN_444] (rows=10246882 width=115) + Conds:RS_520._col0=RS_481._col0(Inner),Output:["_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_481] PartitionCols:_col0 - Select Operator [SEL_459] (rows=130 width=4) + Select Operator [SEL_473] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_453] (rows=130 width=12) + Filter Operator [FIL_467] (rows=130 width=12) predicate:((d_year = 2000) and (d_qoy = 2) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] + Please refer to the previous TableScan [TS_6] <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_514] + SHUFFLE [RS_520] PartitionCols:_col0 - Select Operator [SEL_513] (rows=143931246 width=119) + Select Operator [SEL_519] (rows=143931246 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_512] (rows=143931246 width=119) - predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_40] (rows=144002668 width=119) + Filter Operator [FIL_518] (rows=143931246 width=119) + predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_52_date_dim_d_date_sk_min) AND DynamicValue(RS_52_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_52_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_45] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_511] - Group By Operator [GBY_510] (rows=1 width=12) + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_517] + Group By Operator [GBY_516] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_483] - Group By Operator [GBY_477] (rows=1 width=12) + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_497] + Group By Operator [GBY_491] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_468] (rows=130 width=4) + Select Operator [SEL_482] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_459] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_130] + Please refer to the previous Select Operator [SEL_473] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_136] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_446] (rows=440 width=326) - Conds:RS_502._col0=RS_509._col0(Inner),Output:["_col0","_col1","_col2","_col4"] - <-Reducer 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_509] - PartitionCols:_col0 - Group By Operator [GBY_508] (rows=440 width=210) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col0 - Group By Operator [GBY_36] (rows=3960 width=210) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_437] (rows=10246882 width=209) - Conds:RS_32._col1=RS_495._col0(Inner),Output:["_col2","_col5"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_495] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_493] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_436] (rows=10246882 width=115) - Conds:RS_507._col0=RS_465._col0(Inner),Output:["_col1","_col2"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_465] - PartitionCols:_col0 - Select Operator [SEL_458] (rows=130 width=4) - Output:["_col0"] - Filter Operator [FIL_452] (rows=130 width=12) - predicate:((d_year = 2000) and (d_qoy = 3) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_507] - PartitionCols:_col0 - Select Operator [SEL_506] (rows=143931246 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_505] (rows=143931246 width=119) - predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_20] (rows=144002668 width=119) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_504] - Group By Operator [GBY_503] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_482] - Group By Operator [GBY_476] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_466] (rows=130 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_458] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_502] + Merge Join Operator [MERGEJOIN_452] (rows=440 width=326) + Conds:RS_508._col0=RS_515._col0(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_508] PartitionCols:_col0 - Select Operator [SEL_501] (rows=440 width=214) + Select Operator [SEL_507] (rows=440 width=214) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_500] (rows=440 width=210) + Group By Operator [GBY_506] (rows=440 width=210) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=3960 width=210) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 - Merge Join Operator [MERGEJOIN_435] (rows=10246882 width=209) - Conds:RS_12._col1=RS_494._col0(Inner),Output:["_col2","_col5"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_494] + Group By Operator [GBY_17] (rows=3960 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col4)"],keys:_col1 + Merge Join Operator [MERGEJOIN_441] (rows=10246882 width=209) + Conds:RS_459._col0=RS_14._col1(Inner),Output:["_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_459] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_493] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + Please refer to the previous Select Operator [SEL_458] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_434] (rows=10246882 width=115) - Conds:RS_491._col0=RS_463._col0(Inner),Output:["_col1","_col2"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_463] + Merge Join Operator [MERGEJOIN_440] (rows=10246882 width=115) + Conds:RS_505._col0=RS_477._col0(Inner),Output:["_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_477] PartitionCols:_col0 - Select Operator [SEL_457] (rows=130 width=4) + Select Operator [SEL_471] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_451] (rows=130 width=12) + Filter Operator [FIL_465] (rows=130 width=12) predicate:((d_year = 2000) and (d_qoy = 1) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_491] + Please refer to the previous TableScan [TS_6] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_505] PartitionCols:_col0 - Select Operator [SEL_490] (rows=143931246 width=119) + Select Operator [SEL_504] (rows=143931246 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_489] (rows=143931246 width=119) + Filter Operator [FIL_503] (rows=143931246 width=119) predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=119) + TableScan [TS_3] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_488] - Group By Operator [GBY_487] (rows=1 width=12) + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_502] + Group By Operator [GBY_501] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_481] - Group By Operator [GBY_475] (rows=1 width=12) + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_495] + Group By Operator [GBY_489] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_464] (rows=130 width=4) + Select Operator [SEL_478] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_457] + Please refer to the previous Select Operator [SEL_471] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_515] + PartitionCols:_col0 + Group By Operator [GBY_514] (rows=440 width=210) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0 + Group By Operator [GBY_38] (rows=3960 width=210) + Output:["_col0","_col1"],aggregations:["sum(_col4)"],keys:_col1 + Merge Join Operator [MERGEJOIN_443] (rows=10246882 width=209) + Conds:RS_460._col0=RS_35._col1(Inner),Output:["_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_460] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_458] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_442] (rows=10246882 width=115) + Conds:RS_513._col0=RS_479._col0(Inner),Output:["_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_479] + PartitionCols:_col0 + Select Operator [SEL_472] (rows=130 width=4) + Output:["_col0"] + Filter Operator [FIL_466] (rows=130 width=12) + predicate:((d_year = 2000) and (d_qoy = 3) and d_date_sk is not null) + Please refer to the previous TableScan [TS_6] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_513] + PartitionCols:_col0 + Select Operator [SEL_512] (rows=143931246 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_511] (rows=143931246 width=119) + predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_24] (rows=144002668 width=119) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_510] + Group By Operator [GBY_509] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_496] + Group By Operator [GBY_490] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_480] (rows=130 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_472] diff --git a/ql/src/test/results/clientpositive/perf/tez/query33.q.out b/ql/src/test/results/clientpositive/perf/tez/query33.q.out index aea1088e31..839100e78c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query33.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query33.q.out @@ -163,27 +163,27 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 14 <- Reducer 18 (BROADCAST_EDGE) -Map 26 <- Reducer 21 (BROADCAST_EDGE) -Map 27 <- Reducer 24 (BROADCAST_EDGE) -Reducer 10 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Map 18 <- Reducer 21 (BROADCAST_EDGE) +Map 26 <- Reducer 23 (BROADCAST_EDGE) +Map 27 <- Reducer 25 (BROADCAST_EDGE) +Reducer 10 <- Reducer 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 13 <- Map 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 16 <- Map 25 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 16 <- Map 14 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 17 <- Map 14 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 20 <- Map 25 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 17 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 23 <- Map 25 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 20 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 23 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 20 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 25 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 8 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 5 (CONTAINS) Stage-0 @@ -191,226 +191,220 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_359] - Limit [LIM_358] (rows=59 width=115) + File Output Operator [FS_362] + Limit [LIM_361] (rows=59 width=115) Number of rows:100 - Select Operator [SEL_357] (rows=59 width=115) + Select Operator [SEL_360] (rows=59 width=115) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_356] - Top N Key Operator [TNK_355] (rows=59 width=115) + SHUFFLE [RS_359] + Top N Key Operator [TNK_358] (rows=59 width=115) keys:_col1,top n:100 - Group By Operator [GBY_354] (rows=59 width=115) + Group By Operator [GBY_357] (rows=59 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Union 5 [SIMPLE_EDGE] <-Reducer 11 [CONTAINS] vectorized - Reduce Output Operator [RS_375] + Reduce Output Operator [RS_378] PartitionCols:_col0 - Group By Operator [GBY_374] (rows=59 width=115) + Group By Operator [GBY_377] (rows=59 width=115) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_373] (rows=19 width=115) + Group By Operator [GBY_376] (rows=19 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_109] + SHUFFLE [RS_112] PartitionCols:_col0 - Group By Operator [GBY_108] (rows=19 width=115) + Group By Operator [GBY_111] (rows=19 width=115) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_305] (rows=11364 width=3) - Conds:RS_104._col0=RS_105._col2(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_308] (rows=11364 width=3) + Conds:RS_107._col0=RS_108._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_104] + SHUFFLE [RS_107] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_294] (rows=461514 width=7) - Conds:RS_320._col1=RS_326._col0(Inner),Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_297] (rows=461514 width=7) + Conds:RS_323._col1=RS_329._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + SHUFFLE [RS_323] PartitionCols:_col1 - Select Operator [SEL_319] (rows=460848 width=7) + Select Operator [SEL_322] (rows=460848 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_318] (rows=460848 width=7) + Filter Operator [FIL_321] (rows=460848 width=7) predicate:(i_manufact_id is not null and i_item_sk is not null) TableScan [TS_0] (rows=462000 width=7) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_manufact_id"] <-Reducer 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_326] + SHUFFLE [RS_329] PartitionCols:_col0 - Group By Operator [GBY_325] (rows=692 width=3) + Group By Operator [GBY_328] (rows=692 width=3) Output:["_col0"],keys:KEY._col0 <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_324] + SHUFFLE [RS_327] PartitionCols:_col0 - Group By Operator [GBY_323] (rows=692 width=3) + Group By Operator [GBY_326] (rows=692 width=3) Output:["_col0"],keys:i_manufact_id - Select Operator [SEL_322] (rows=46085 width=93) + Select Operator [SEL_325] (rows=46085 width=93) Output:["i_manufact_id"] - Filter Operator [FIL_321] (rows=46085 width=93) + Filter Operator [FIL_324] (rows=46085 width=93) predicate:((i_category = 'Books') and i_manufact_id is not null) TableScan [TS_3] (rows=462000 width=93) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_category","i_manufact_id"] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_105] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_108] PartitionCols:_col2 - Select Operator [SEL_100] (rows=788222 width=110) - Output:["_col2","_col4"] - Merge Join Operator [MERGEJOIN_302] (rows=788222 width=110) - Conds:RS_97._col2=RS_350._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_350] - PartitionCols:_col0 - Select Operator [SEL_347] (rows=8000000 width=4) - Output:["_col0"] - Filter Operator [FIL_346] (rows=8000000 width=112) - predicate:((ca_gmt_offset = -6) and ca_address_sk is not null) - TableScan [TS_16] (rows=40000000 width=112) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_97] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_301] (rows=3941109 width=118) - Conds:RS_372._col0=RS_333._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_333] - PartitionCols:_col0 - Select Operator [SEL_328] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_327] (rows=50 width=12) - predicate:((d_year = 1999) and (d_moy = 3) and d_date_sk is not null) - TableScan [TS_13] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_372] - PartitionCols:_col0 - Select Operator [SEL_371] (rows=143931246 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_370] (rows=143931246 width=123) - predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_95_date_dim_d_date_sk_min) AND DynamicValue(RS_95_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_95_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_85] (rows=144002668 width=123) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_369] - Group By Operator [GBY_368] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_340] - Group By Operator [GBY_337] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_334] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_328] + Merge Join Operator [MERGEJOIN_305] (rows=788222 width=110) + Conds:RS_334._col0=RS_101._col2(Inner),Output:["_col2","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_334] + PartitionCols:_col0 + Select Operator [SEL_331] (rows=8000000 width=4) + Output:["_col0"] + Filter Operator [FIL_330] (rows=8000000 width=112) + predicate:((ca_gmt_offset = -6) and ca_address_sk is not null) + TableScan [TS_10] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_304] (rows=3941109 width=118) + Conds:RS_375._col0=RS_341._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_341] + PartitionCols:_col0 + Select Operator [SEL_336] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_335] (rows=50 width=12) + predicate:((d_year = 1999) and (d_moy = 3) and d_date_sk is not null) + TableScan [TS_16] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_375] + PartitionCols:_col0 + Select Operator [SEL_374] (rows=143931246 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_373] (rows=143931246 width=123) + predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_97_date_dim_d_date_sk_min) AND DynamicValue(RS_97_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_97_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_90] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_372] + Group By Operator [GBY_371] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_348] + Group By Operator [GBY_345] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_342] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_336] <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_353] + Reduce Output Operator [RS_356] PartitionCols:_col0 - Group By Operator [GBY_352] (rows=59 width=115) + Group By Operator [GBY_355] (rows=59 width=115) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_351] (rows=64 width=115) + Group By Operator [GBY_354] (rows=64 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_34] + SHUFFLE [RS_35] PartitionCols:_col0 - Group By Operator [GBY_33] (rows=64 width=115) + Group By Operator [GBY_34] (rows=64 width=115) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_303] (rows=41476 width=3) - Conds:RS_29._col0=RS_30._col2(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_306] (rows=41476 width=3) + Conds:RS_30._col0=RS_31._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_294] - <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_30] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_297] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_31] PartitionCols:_col2 - Select Operator [SEL_25] (rows=2876890 width=4) - Output:["_col2","_col4"] - Merge Join Operator [MERGEJOIN_296] (rows=2876890 width=4) - Conds:RS_22._col2=RS_348._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_348] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_347] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_22] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_295] (rows=14384447 width=4) - Conds:RS_345._col0=RS_329._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_329] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_328] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_345] - PartitionCols:_col0 - Select Operator [SEL_344] (rows=525327191 width=118) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_343] (rows=525327191 width=118) - predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_10] (rows=575995635 width=118) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_342] - Group By Operator [GBY_341] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_338] - Group By Operator [GBY_335] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_330] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_328] + Merge Join Operator [MERGEJOIN_299] (rows=2876890 width=4) + Conds:RS_332._col0=RS_24._col2(Inner),Output:["_col2","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_332] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_331] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_298] (rows=14384447 width=4) + Conds:RS_353._col0=RS_337._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_337] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_336] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_353] + PartitionCols:_col0 + Select Operator [SEL_352] (rows=525327191 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_351] (rows=525327191 width=118) + predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_13] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_350] + Group By Operator [GBY_349] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_346] + Group By Operator [GBY_343] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_338] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_336] <-Reducer 9 [CONTAINS] vectorized - Reduce Output Operator [RS_367] + Reduce Output Operator [RS_370] PartitionCols:_col0 - Group By Operator [GBY_366] (rows=59 width=115) + Group By Operator [GBY_369] (rows=59 width=115) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_365] (rows=35 width=115) + Group By Operator [GBY_368] (rows=35 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_71] + SHUFFLE [RS_73] PartitionCols:_col0 - Group By Operator [GBY_70] (rows=35 width=115) + Group By Operator [GBY_72] (rows=35 width=115) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_304] (rows=22352 width=3) - Conds:RS_66._col0=RS_67._col3(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_307] (rows=22352 width=3) + Conds:RS_68._col0=RS_69._col3(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_66] + SHUFFLE [RS_68] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_294] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_67] + Please refer to the previous Merge Join Operator [MERGEJOIN_297] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_69] PartitionCols:_col3 - Select Operator [SEL_62] (rows=1550375 width=13) - Output:["_col3","_col4"] - Merge Join Operator [MERGEJOIN_299] (rows=1550375 width=13) - Conds:RS_59._col1=RS_349._col0(Inner),Output:["_col2","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_349] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_347] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_59] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_298] (rows=7751872 width=98) - Conds:RS_364._col0=RS_331._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_331] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_328] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_364] - PartitionCols:_col0 - Select Operator [SEL_363] (rows=285117733 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_362] (rows=285117733 width=123) - predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_47] (rows=287989836 width=123) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_361] - Group By Operator [GBY_360] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_339] - Group By Operator [GBY_336] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_332] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_328] + Merge Join Operator [MERGEJOIN_302] (rows=1550375 width=13) + Conds:RS_333._col0=RS_62._col1(Inner),Output:["_col3","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_333] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_331] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_62] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_301] (rows=7751872 width=98) + Conds:RS_367._col0=RS_339._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_339] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_336] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_367] + PartitionCols:_col0 + Select Operator [SEL_366] (rows=285117733 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_365] (rows=285117733 width=123) + predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_58_date_dim_d_date_sk_min) AND DynamicValue(RS_58_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_58_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_51] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_364] + Group By Operator [GBY_363] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_347] + Group By Operator [GBY_344] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_340] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_336] diff --git a/ql/src/test/results/clientpositive/perf/tez/query38.q.out b/ql/src/test/results/clientpositive/perf/tez/query38.q.out index 4b6c45a350..ddd32da318 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query38.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query38.q.out @@ -57,205 +57,205 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Map 19 <- Reducer 13 (BROADCAST_EDGE) -Map 20 <- Reducer 17 (BROADCAST_EDGE) -Reducer 10 <- Map 19 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 13 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 20 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 17 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 6 <- Union 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Map 11 <- Reducer 14 (BROADCAST_EDGE) +Map 19 <- Reducer 16 (BROADCAST_EDGE) +Map 20 <- Reducer 18 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 13 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) +Reducer 16 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 13 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) +Reducer 18 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_234] - Group By Operator [GBY_233] (rows=1 width=8) + Reducer 6 vectorized + File Output Operator [FS_237] + Group By Operator [GBY_236] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_232] - Group By Operator [GBY_231] (rows=1 width=8) + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_235] + Group By Operator [GBY_234] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_230] (rows=1 width=8) - Filter Operator [FIL_229] (rows=1 width=8) + Select Operator [SEL_233] (rows=1 width=8) + Filter Operator [FIL_232] (rows=1 width=8) predicate:(_col3 = 3L) - Select Operator [SEL_228] (rows=165330890 width=8) + Select Operator [SEL_231] (rows=165330890 width=8) Output:["_col3"] - Group By Operator [GBY_227] (rows=165330890 width=282) + Group By Operator [GBY_230] (rows=165330890 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 12 [CONTAINS] vectorized - Reduce Output Operator [RS_244] + <-Union 4 [SIMPLE_EDGE] + <-Reducer 10 [CONTAINS] vectorized + Reduce Output Operator [RS_257] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_243] (rows=165330890 width=282) + Group By Operator [GBY_256] (rows=165330890 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_242] (rows=49146883 width=282) + Group By Operator [GBY_255] (rows=24986582 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_241] (rows=49146883 width=274) + Select Operator [SEL_254] (rows=24986582 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_240] (rows=49146883 width=274) + Group By Operator [GBY_253] (rows=24986582 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_42] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_71] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_41] (rows=49146883 width=274) - Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 - Merge Join Operator [MERGEJOIN_177] (rows=49146883 width=274) - Conds:RS_37._col1=RS_220._col0(Inner),Output:["_col3","_col5","_col6"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] + Group By Operator [GBY_70] (rows=24986582 width=274) + Output:["_col0","_col1","_col2"],keys:_col2, _col1, _col6 + Merge Join Operator [MERGEJOIN_182] (rows=24986582 width=274) + Conds:RS_205._col0=RS_67._col1(Inner),Output:["_col1","_col2","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_205] PartitionCols:_col0 - Select Operator [SEL_218] (rows=80000000 width=184) + Select Operator [SEL_202] (rows=80000000 width=184) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_217] (rows=80000000 width=184) + Filter Operator [FIL_201] (rows=80000000 width=184) predicate:c_customer_sk is not null - TableScan [TS_6] (rows=80000000 width=184) + TableScan [TS_0] (rows=80000000 width=184) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_first_name","c_last_name"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_37] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_67] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_176] (rows=49146883 width=97) - Conds:RS_239._col0=RS_202._col0(Inner),Output:["_col1","_col3"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_202] + Merge Join Operator [MERGEJOIN_181] (rows=24986582 width=97) + Conds:RS_252._col0=RS_212._col0(Inner),Output:["_col1","_col3"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_212] PartitionCols:_col0 - Select Operator [SEL_199] (rows=317 width=98) + Select Operator [SEL_207] (rows=317 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_198] (rows=317 width=102) + Filter Operator [FIL_206] (rows=317 width=102) predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=102) + TableScan [TS_6] (rows=73049 width=102) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_month_seq"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_239] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_252] PartitionCols:_col0 - Select Operator [SEL_238] (rows=285117831 width=7) + Select Operator [SEL_251] (rows=143930993 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_237] (rows=285117831 width=7) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_25] (rows=287989836 width=7) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_236] - Group By Operator [GBY_235] (rows=1 width=12) + Filter Operator [FIL_250] (rows=143930993 width=7) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_63_date_dim_d_date_sk_min) AND DynamicValue(RS_63_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_63_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_56] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_249] + Group By Operator [GBY_248] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_210] - Group By Operator [GBY_207] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_219] + Group By Operator [GBY_216] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_203] (rows=317 width=4) + Select Operator [SEL_213] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_199] - <-Reducer 16 [CONTAINS] vectorized - Reduce Output Operator [RS_254] + Please refer to the previous Select Operator [SEL_207] + <-Reducer 3 [CONTAINS] vectorized + Reduce Output Operator [RS_229] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_253] (rows=165330890 width=282) + Group By Operator [GBY_228] (rows=165330890 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_252] (rows=24986582 width=282) + Group By Operator [GBY_227] (rows=91197425 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_251] (rows=24986582 width=274) + Select Operator [SEL_226] (rows=91197425 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_250] (rows=24986582 width=274) + Group By Operator [GBY_225] (rows=91197425 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_68] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_67] (rows=24986582 width=274) - Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 - Merge Join Operator [MERGEJOIN_179] (rows=24986582 width=274) - Conds:RS_63._col1=RS_221._col0(Inner),Output:["_col3","_col5","_col6"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_221] + Group By Operator [GBY_17] (rows=91197425 width=274) + Output:["_col0","_col1","_col2"],keys:_col2, _col1, _col6 + Merge Join Operator [MERGEJOIN_178] (rows=91197425 width=274) + Conds:RS_203._col0=RS_14._col1(Inner),Output:["_col1","_col2","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_203] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_218] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_63] + Please refer to the previous Select Operator [SEL_202] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_178] (rows=24986582 width=97) - Conds:RS_249._col0=RS_204._col0(Inner),Output:["_col1","_col3"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_204] + Merge Join Operator [MERGEJOIN_177] (rows=91197425 width=96) + Conds:RS_224._col0=RS_208._col0(Inner),Output:["_col1","_col3"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_208] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_199] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_249] + Please refer to the previous Select Operator [SEL_207] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_224] PartitionCols:_col0 - Select Operator [SEL_248] (rows=143930993 width=7) + Select Operator [SEL_223] (rows=525327388 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_247] (rows=143930993 width=7) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_61_date_dim_d_date_sk_min) AND DynamicValue(RS_61_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_61_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_51] (rows=144002668 width=7) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_246] - Group By Operator [GBY_245] (rows=1 width=12) + Filter Operator [FIL_222] (rows=525327388 width=7) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_3] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_221] + Group By Operator [GBY_220] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_211] - Group By Operator [GBY_208] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_217] + Group By Operator [GBY_214] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_205] (rows=317 width=4) + Select Operator [SEL_209] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_199] - <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_226] + Please refer to the previous Select Operator [SEL_207] + <-Reducer 8 [CONTAINS] vectorized + Reduce Output Operator [RS_247] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_225] (rows=165330890 width=282) + Group By Operator [GBY_246] (rows=165330890 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_224] (rows=91197425 width=282) + Group By Operator [GBY_245] (rows=49146883 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_223] (rows=91197425 width=274) + Select Operator [SEL_244] (rows=49146883 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_222] (rows=91197425 width=274) + Group By Operator [GBY_243] (rows=49146883 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_44] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_16] (rows=91197425 width=274) - Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 - Merge Join Operator [MERGEJOIN_175] (rows=91197425 width=274) - Conds:RS_12._col1=RS_219._col0(Inner),Output:["_col3","_col5","_col6"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_219] + Group By Operator [GBY_43] (rows=49146883 width=274) + Output:["_col0","_col1","_col2"],keys:_col2, _col1, _col6 + Merge Join Operator [MERGEJOIN_180] (rows=49146883 width=274) + Conds:RS_204._col0=RS_40._col1(Inner),Output:["_col1","_col2","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_204] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_218] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + Please refer to the previous Select Operator [SEL_202] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_40] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_174] (rows=91197425 width=96) - Conds:RS_216._col0=RS_200._col0(Inner),Output:["_col1","_col3"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_200] + Merge Join Operator [MERGEJOIN_179] (rows=49146883 width=97) + Conds:RS_242._col0=RS_210._col0(Inner),Output:["_col1","_col3"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_210] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_199] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_216] + Please refer to the previous Select Operator [SEL_207] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_242] PartitionCols:_col0 - Select Operator [SEL_215] (rows=525327388 width=7) + Select Operator [SEL_241] (rows=285117831 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_214] (rows=525327388 width=7) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=7) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_213] - Group By Operator [GBY_212] (rows=1 width=12) + Filter Operator [FIL_240] (rows=285117831 width=7) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_36_date_dim_d_date_sk_min) AND DynamicValue(RS_36_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_36_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_29] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk"] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_239] + Group By Operator [GBY_238] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_209] - Group By Operator [GBY_206] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_218] + Group By Operator [GBY_215] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_201] (rows=317 width=4) + Select Operator [SEL_211] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_199] + Please refer to the previous Select Operator [SEL_207] diff --git a/ql/src/test/results/clientpositive/perf/tez/query39.q.out b/ql/src/test/results/clientpositive/perf/tez/query39.q.out index 0681f0520b..3b7d4ccf59 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query39.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query39.q.out @@ -63,145 +63,145 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 9 <- Map 13 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 12 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 3 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 8 <- Map 14 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_232] - Select Operator [SEL_231] (rows=189509 width=56) + Reducer 6 vectorized + File Output Operator [FS_234] + Select Operator [SEL_233] (rows=189509 width=56) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_60] - Merge Join Operator [MERGEJOIN_202] (rows=189509 width=48) - Conds:RS_225._col0, _col1=RS_230._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_230] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_62] + Merge Join Operator [MERGEJOIN_204] (rows=189509 width=48) + Conds:RS_227._col0, _col1=RS_232._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_227] PartitionCols:_col0, _col1 - Select Operator [SEL_229] (rows=18049 width=24) + Select Operator [SEL_226] (rows=18049 width=24) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_228] (rows=18049 width=40) + Filter Operator [FIL_225] (rows=18049 width=40) predicate:CASE WHEN (((UDFToDouble(_col3) / _col4) = 0)) THEN (false) ELSE (((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (UDFToDouble(_col3) / _col4)) > 1.0D)) END - Select Operator [SEL_227] (rows=36099 width=40) + Select Operator [SEL_224] (rows=36099 width=40) Output:["_col0","_col1","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_226] (rows=36099 width=140) + Group By Operator [GBY_223] (rows=36099 width=140) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_52] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_25] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_51] (rows=36099 width=140) + Group By Operator [GBY_24] (rows=36099 width=140) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)","count(_col3)","sum(_col5)","sum(_col4)"],keys:_col1, _col2, _col0 - Select Operator [SEL_49] (rows=1032514 width=108) + Select Operator [SEL_22] (rows=1032514 width=108) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_201] (rows=1032514 width=108) - Conds:RS_46._col2=RS_220._col0(Inner),Output:["_col3","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_200] (rows=1032514 width=108) + Conds:RS_19._col3=RS_221._col0(Inner),Output:["_col0","_col4","_col6","_col7"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] + SHUFFLE [RS_221] PartitionCols:_col0 - Select Operator [SEL_218] (rows=27 width=104) + Select Operator [SEL_220] (rows=27 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_217] (rows=27 width=104) + Filter Operator [FIL_219] (rows=27 width=104) predicate:w_warehouse_sk is not null - TableScan [TS_9] (rows=27 width=104) + TableScan [TS_13] (rows=27 width=104) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_46] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_200] (rows=1032514 width=8) - Conds:RS_43._col1=RS_216._col0(Inner),Output:["_col2","_col3","_col5"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_216] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_199] (rows=1032514 width=8) + Conds:RS_207._col0=RS_17._col1(Inner),Output:["_col0","_col3","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_207] PartitionCols:_col0 - Select Operator [SEL_214] (rows=462000 width=4) + Select Operator [SEL_206] (rows=462000 width=4) Output:["_col0"] - Filter Operator [FIL_213] (rows=462000 width=4) + Filter Operator [FIL_205] (rows=462000 width=4) predicate:i_item_sk is not null - TableScan [TS_6] (rows=462000 width=4) + TableScan [TS_0] (rows=462000 width=4) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_43] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_199] (rows=1032514 width=8) - Conds:RS_206._col0=RS_212._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_206] + Merge Join Operator [MERGEJOIN_198] (rows=1032514 width=8) + Conds:RS_211._col0=RS_217._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_211] PartitionCols:_col0 - Select Operator [SEL_204] (rows=37584000 width=15) + Select Operator [SEL_210] (rows=37584000 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_203] (rows=37584000 width=15) + Filter Operator [FIL_209] (rows=37584000 width=15) predicate:(inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null) - TableScan [TS_0] (rows=37584000 width=15) + TableScan [TS_3] (rows=37584000 width=15) default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_217] PartitionCols:_col0 - Select Operator [SEL_210] (rows=50 width=4) + Select Operator [SEL_215] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_208] (rows=50 width=12) - predicate:((d_year = 1999) and (d_moy = 5) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=12) + Filter Operator [FIL_213] (rows=50 width=12) + predicate:((d_year = 1999) and (d_moy = 4) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_232] PartitionCols:_col0, _col1 - Select Operator [SEL_224] (rows=18049 width=24) + Select Operator [SEL_231] (rows=18049 width=24) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_223] (rows=18049 width=40) + Filter Operator [FIL_230] (rows=18049 width=40) predicate:CASE WHEN (((UDFToDouble(_col3) / _col4) = 0)) THEN (false) ELSE (((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (UDFToDouble(_col3) / _col4)) > 1.0D)) END - Select Operator [SEL_222] (rows=36099 width=40) + Select Operator [SEL_229] (rows=36099 width=40) Output:["_col0","_col1","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_221] (rows=36099 width=140) + Group By Operator [GBY_228] (rows=36099 width=140) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_54] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_23] (rows=36099 width=140) + Group By Operator [GBY_53] (rows=36099 width=140) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)","count(_col3)","sum(_col5)","sum(_col4)"],keys:_col1, _col2, _col0 - Select Operator [SEL_21] (rows=1032514 width=108) + Select Operator [SEL_51] (rows=1032514 width=108) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_198] (rows=1032514 width=108) - Conds:RS_18._col2=RS_219._col0(Inner),Output:["_col3","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_203] (rows=1032514 width=108) + Conds:RS_48._col3=RS_222._col0(Inner),Output:["_col0","_col4","_col6","_col7"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_219] + SHUFFLE [RS_222] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_218] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_197] (rows=1032514 width=8) - Conds:RS_15._col1=RS_215._col0(Inner),Output:["_col2","_col3","_col5"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] + Please refer to the previous Select Operator [SEL_220] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_48] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_202] (rows=1032514 width=8) + Conds:RS_208._col0=RS_46._col1(Inner),Output:["_col0","_col3","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_208] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_214] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_15] + Please refer to the previous Select Operator [SEL_206] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_46] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_196] (rows=1032514 width=8) - Conds:RS_205._col0=RS_211._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_205] + Merge Join Operator [MERGEJOIN_201] (rows=1032514 width=8) + Conds:RS_212._col0=RS_218._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_212] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_204] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_211] + Please refer to the previous Select Operator [SEL_210] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_218] PartitionCols:_col0 - Select Operator [SEL_209] (rows=50 width=4) + Select Operator [SEL_216] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_207] (rows=50 width=12) - predicate:((d_year = 1999) and (d_moy = 4) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] + Filter Operator [FIL_214] (rows=50 width=12) + predicate:((d_year = 1999) and (d_moy = 5) and d_date_sk is not null) + Please refer to the previous TableScan [TS_6] PREHOOK: query: with inv as (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy diff --git a/ql/src/test/results/clientpositive/perf/tez/query4.q.out b/ql/src/test/results/clientpositive/perf/tez/query4.q.out index 1b2f165c24..65962480ab 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query4.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query4.q.out @@ -229,399 +229,401 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 34 (BROADCAST_EDGE) -Map 11 <- Reducer 35 (BROADCAST_EDGE) -Map 15 <- Reducer 36 (BROADCAST_EDGE) -Map 19 <- Reducer 37 (BROADCAST_EDGE) -Map 23 <- Reducer 33 (BROADCAST_EDGE) -Map 27 <- Reducer 32 (BROADCAST_EDGE) +Map 20 <- Reducer 23 (BROADCAST_EDGE) +Map 34 <- Reducer 25 (BROADCAST_EDGE) +Map 35 <- Reducer 27 (BROADCAST_EDGE) +Map 36 <- Reducer 29 (BROADCAST_EDGE) +Map 37 <- Reducer 31 (BROADCAST_EDGE) +Map 38 <- Reducer 33 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 13 <- Map 38 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 17 <- Map 38 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 21 <- Map 38 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (SIMPLE_EDGE) -Reducer 24 <- Map 23 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 25 <- Map 38 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (SIMPLE_EDGE) -Reducer 28 <- Map 27 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 29 <- Map 38 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) -Reducer 3 <- Map 38 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Reducer 29 (SIMPLE_EDGE) -Reducer 32 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 34 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 35 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 36 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 37 <- Map 31 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 18 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 22 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 26 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 30 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 14 <- Map 1 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Map 1 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Map 1 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) +Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 22 (SIMPLE_EDGE), Map 34 (SIMPLE_EDGE) +Reducer 25 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 22 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) +Reducer 27 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 22 (SIMPLE_EDGE), Map 36 (SIMPLE_EDGE) +Reducer 29 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 22 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) +Reducer 31 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 32 <- Map 22 (SIMPLE_EDGE), Map 38 (SIMPLE_EDGE) +Reducer 33 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 1 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 13 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 10 vectorized + Reducer 5 vectorized File Output Operator [FS_570] Limit [LIM_569] (rows=100 width=85) Number of rows:100 Select Operator [SEL_568] (rows=7323197 width=85) Output:["_col0"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_141] - Select Operator [SEL_140] (rows=7323197 width=85) + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_149] + Select Operator [SEL_148] (rows=7323197 width=85) Output:["_col0"] - Top N Key Operator [TNK_256] (rows=7323197 width=537) - keys:_col13,top n:100 - Filter Operator [FIL_139] (rows=7323197 width=537) - predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col9) THEN (((_col11 / _col8) > (_col14 / _col3))) ELSE (false) END) ELSE (false) END + Top N Key Operator [TNK_264] (rows=7323197 width=537) + keys:_col1,top n:100 + Filter Operator [FIL_147] (rows=7323197 width=537) + predicate:CASE WHEN (_col8 is not null) THEN (CASE WHEN (_col14) THEN (((_col4 / _col13) > (_col2 / _col8))) ELSE (false) END) ELSE (false) END Merge Join Operator [MERGEJOIN_473] (rows=14646395 width=537) - Conds:RS_136._col2=RS_567._col0(Inner),Output:["_col3","_col8","_col9","_col11","_col13","_col14"] - <-Reducer 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_567] + Conds:RS_521._col0=RS_145._col4(Inner),Output:["_col1","_col2","_col4","_col8","_col13","_col14"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_521] PartitionCols:_col0 - Select Operator [SEL_566] (rows=80000000 width=297) + Select Operator [SEL_520] (rows=80000000 width=297) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_565] (rows=80000000 width=764) + Group By Operator [GBY_519] (rows=80000000 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_120] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_119] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_468] (rows=187573258 width=764) - Conds:RS_115._col1=RS_513._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_513] + Group By Operator [GBY_17] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_458] (rows=187573258 width=764) + Conds:RS_476._col0=RS_14._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_476] PartitionCols:_col0 - Select Operator [SEL_512] (rows=80000000 width=656) + Select Operator [SEL_475] (rows=80000000 width=656) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_511] (rows=80000000 width=656) + Filter Operator [FIL_474] (rows=80000000 width=656) predicate:(c_customer_sk is not null and c_customer_id is not null) - TableScan [TS_109] (rows=80000000 width=656) + TableScan [TS_0] (rows=80000000 width=656) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] - <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_115] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_467] (rows=187573258 width=115) - Conds:RS_564._col0=RS_482._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_482] + Merge Join Operator [MERGEJOIN_457] (rows=187573258 width=115) + Conds:RS_518._col0=RS_490._col0(Inner),Output:["_col1","_col2"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_490] PartitionCols:_col0 - Select Operator [SEL_478] (rows=652 width=4) + Select Operator [SEL_486] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_474] (rows=652 width=8) + Filter Operator [FIL_482] (rows=652 width=8) predicate:((d_year = 2002) and d_date_sk is not null) - TableScan [TS_106] (rows=73049 width=8) + TableScan [TS_6] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_564] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_518] PartitionCols:_col0 - Select Operator [SEL_563] (rows=525327388 width=119) + Select Operator [SEL_517] (rows=525327388 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_562] (rows=525327388 width=435) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_113_date_dim_d_date_sk_min) AND DynamicValue(RS_113_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_113_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_103] (rows=575995635 width=435) + Filter Operator [FIL_516] (rows=525327388 width=435) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_3] (rows=575995635 width=435) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] - <-Reducer 32 [BROADCAST_EDGE] vectorized - BROADCAST [RS_561] - Group By Operator [GBY_560] (rows=1 width=12) + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_515] + Group By Operator [GBY_514] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_500] - Group By Operator [GBY_494] (rows=1 width=12) + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_508] + Group By Operator [GBY_502] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_483] (rows=652 width=4) + Select Operator [SEL_491] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_478] + Please refer to the previous Select Operator [SEL_486] <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_136] - PartitionCols:_col2 - Filter Operator [FIL_135] (rows=12248093 width=668) - predicate:CASE WHEN (_col6) THEN (CASE WHEN (_col9) THEN (((_col11 / _col8) > (_col1 / _col5))) ELSE (false) END) ELSE (false) END - Merge Join Operator [MERGEJOIN_472] (rows=24496186 width=668) - Conds:RS_132._col2=RS_559._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col8","_col9","_col11"] - <-Reducer 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_559] - PartitionCols:_col0 - Select Operator [SEL_558] (rows=80000000 width=212) - Output:["_col0","_col1"] - Group By Operator [GBY_557] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_100] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_99] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_466] (rows=101084444 width=764) - Conds:RS_95._col1=RS_514._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_514] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_512] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_95] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_465] (rows=101084444 width=115) - Conds:RS_556._col0=RS_484._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_484] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_478] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_556] - PartitionCols:_col0 - Select Operator [SEL_555] (rows=285117831 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_554] (rows=285117831 width=453) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_93_date_dim_d_date_sk_min) AND DynamicValue(RS_93_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_93_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_83] (rows=287989836 width=453) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] - <-Reducer 33 [BROADCAST_EDGE] vectorized - BROADCAST [RS_553] - Group By Operator [GBY_552] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_501] - Group By Operator [GBY_495] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_485] (rows=652 width=4) + SHUFFLE [RS_145] + PartitionCols:_col4 + Select Operator [SEL_143] (rows=12248093 width=668) + Output:["_col1","_col4","_col5","_col10","_col11"] + Filter Operator [FIL_142] (rows=12248093 width=668) + predicate:CASE WHEN (_col8) THEN (CASE WHEN (_col11) THEN (((_col1 / _col10) > (_col3 / _col7))) ELSE (false) END) ELSE (false) END + Merge Join Operator [MERGEJOIN_472] (rows=24496186 width=668) + Conds:RS_529._col0=RS_140._col2(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col10","_col11"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_140] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_471] (rows=20485011 width=556) + Conds:RS_135._col2=RS_567._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col8","_col9"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_135] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_470] (rows=20485011 width=440) + Conds:RS_132._col2=RS_557._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_132] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_469] (rows=31888273 width=324) + Conds:RS_537._col0=RS_547._col0(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_537] + PartitionCols:_col0 + Select Operator [SEL_536] (rows=51391963 width=212) + Output:["_col0","_col1"] + Group By Operator [GBY_535] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_59] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_462] (rows=51391963 width=764) + Conds:RS_478._col0=RS_56._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_478] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_475] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_56] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_461] (rows=51391963 width=115) + Conds:RS_534._col0=RS_494._col0(Inner),Output:["_col1","_col2"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_494] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_486] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_534] + PartitionCols:_col0 + Select Operator [SEL_533] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_532] (rows=143930993 width=455) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_52_date_dim_d_date_sk_min) AND DynamicValue(RS_52_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_52_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_45] (rows=144002668 width=455) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_531] + Group By Operator [GBY_530] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_510] + Group By Operator [GBY_504] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_495] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_486] + <-Reducer 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_547] + PartitionCols:_col0 + Select Operator [SEL_546] (rows=26666666 width=212) + Output:["_col0","_col1"] + Filter Operator [FIL_545] (rows=26666666 width=212) + predicate:(_col7 > 0) + Select Operator [SEL_544] (rows=80000000 width=212) + Output:["_col0","_col7"] + Group By Operator [GBY_543] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_81] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_80] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_464] (rows=187573258 width=764) + Conds:RS_479._col0=RS_77._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_479] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_475] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_463] (rows=187573258 width=115) + Conds:RS_542._col0=RS_496._col0(Inner),Output:["_col1","_col2"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_496] + PartitionCols:_col0 + Select Operator [SEL_487] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_483] (rows=652 width=8) + predicate:((d_year = 2001) and d_date_sk is not null) + Please refer to the previous TableScan [TS_6] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_542] + PartitionCols:_col0 + Select Operator [SEL_541] (rows=525327388 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_540] (rows=525327388 width=435) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_73_date_dim_d_date_sk_min) AND DynamicValue(RS_73_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_73_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_66] (rows=575995635 width=435) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_539] + Group By Operator [GBY_538] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_511] + Group By Operator [GBY_505] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_497] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_487] + <-Reducer 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_557] + PartitionCols:_col0 + Select Operator [SEL_556] (rows=17130654 width=216) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_555] (rows=17130654 width=212) + predicate:(_col7 > 0) + Select Operator [SEL_554] (rows=51391963 width=212) + Output:["_col0","_col7"] + Group By Operator [GBY_553] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_103] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_102] (rows=51391963 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_466] (rows=51391963 width=764) + Conds:RS_480._col0=RS_99._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_480] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_475] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_99] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_465] (rows=51391963 width=115) + Conds:RS_552._col0=RS_498._col0(Inner),Output:["_col1","_col2"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_498] + PartitionCols:_col0 + Select Operator [SEL_488] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_478] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_132] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_471] (rows=20485011 width=556) - Conds:RS_129._col2=RS_551._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col8","_col9"] - <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_551] - PartitionCols:_col0 - Select Operator [SEL_550] (rows=26666666 width=216) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_549] (rows=26666666 width=212) - predicate:(_col7 > 0) - Select Operator [SEL_548] (rows=80000000 width=212) - Output:["_col0","_col7"] - Group By Operator [GBY_547] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_79] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_78] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_464] (rows=101084444 width=764) - Conds:RS_74._col1=RS_518._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_518] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_512] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_74] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_463] (rows=101084444 width=115) - Conds:RS_546._col0=RS_492._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_492] - PartitionCols:_col0 - Select Operator [SEL_481] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_477] (rows=652 width=8) - predicate:((d_year = 2001) and d_date_sk is not null) - Please refer to the previous TableScan [TS_106] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_546] - PartitionCols:_col0 - Select Operator [SEL_545] (rows=285117831 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_544] (rows=285117831 width=453) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_72_date_dim_d_date_sk_min) AND DynamicValue(RS_72_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_72_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_62] (rows=287989836 width=453) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] - <-Reducer 37 [BROADCAST_EDGE] vectorized - BROADCAST [RS_543] - Group By Operator [GBY_542] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_505] - Group By Operator [GBY_499] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_493] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_481] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_129] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_470] (rows=20485011 width=440) - Conds:RS_126._col2=RS_541._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] - <-Reducer 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_541] - PartitionCols:_col0 - Select Operator [SEL_540] (rows=17130654 width=216) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_539] (rows=17130654 width=212) - predicate:(_col7 > 0) - Select Operator [SEL_538] (rows=51391963 width=212) - Output:["_col0","_col7"] - Group By Operator [GBY_537] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_57] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_462] (rows=51391963 width=764) - Conds:RS_53._col1=RS_517._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_517] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_512] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_53] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_461] (rows=51391963 width=115) - Conds:RS_536._col0=RS_490._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_490] - PartitionCols:_col0 - Select Operator [SEL_480] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_476] (rows=652 width=8) - predicate:((d_year = 2001) and d_date_sk is not null) - Please refer to the previous TableScan [TS_106] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_536] - PartitionCols:_col0 - Select Operator [SEL_535] (rows=143930993 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_534] (rows=143930993 width=455) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_51_date_dim_d_date_sk_min) AND DynamicValue(RS_51_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_51_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_41] (rows=144002668 width=455) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] - <-Reducer 36 [BROADCAST_EDGE] vectorized - BROADCAST [RS_533] - Group By Operator [GBY_532] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_504] - Group By Operator [GBY_498] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_491] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_480] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_126] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_469] (rows=31888273 width=324) - Conds:RS_521._col0=RS_531._col0(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_531] - PartitionCols:_col0 - Select Operator [SEL_530] (rows=26666666 width=212) - Output:["_col0","_col1"] - Filter Operator [FIL_529] (rows=26666666 width=212) - predicate:(_col7 > 0) - Select Operator [SEL_528] (rows=80000000 width=212) - Output:["_col0","_col7"] - Group By Operator [GBY_527] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_36] (rows=80000000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_460] (rows=187573258 width=764) - Conds:RS_32._col1=RS_516._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_516] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_512] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_459] (rows=187573258 width=115) - Conds:RS_526._col0=RS_488._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_488] - PartitionCols:_col0 - Select Operator [SEL_479] (rows=652 width=4) + Filter Operator [FIL_484] (rows=652 width=8) + predicate:((d_year = 2001) and d_date_sk is not null) + Please refer to the previous TableScan [TS_6] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_552] + PartitionCols:_col0 + Select Operator [SEL_551] (rows=143930993 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_550] (rows=143930993 width=455) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_95_date_dim_d_date_sk_min) AND DynamicValue(RS_95_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_95_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_88] (rows=144002668 width=455) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] + <-Reducer 31 [BROADCAST_EDGE] vectorized + BROADCAST [RS_549] + Group By Operator [GBY_548] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_512] + Group By Operator [GBY_506] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_499] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_488] + <-Reducer 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_567] + PartitionCols:_col0 + Select Operator [SEL_566] (rows=26666666 width=216) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_565] (rows=26666666 width=212) + predicate:(_col7 > 0) + Select Operator [SEL_564] (rows=80000000 width=212) + Output:["_col0","_col7"] + Group By Operator [GBY_563] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_125] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_124] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_468] (rows=101084444 width=764) + Conds:RS_481._col0=RS_121._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_481] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_475] + <-Reducer 32 [SIMPLE_EDGE] + SHUFFLE [RS_121] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_467] (rows=101084444 width=115) + Conds:RS_562._col0=RS_500._col0(Inner),Output:["_col1","_col2"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_500] + PartitionCols:_col0 + Select Operator [SEL_489] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_485] (rows=652 width=8) + predicate:((d_year = 2001) and d_date_sk is not null) + Please refer to the previous TableScan [TS_6] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_562] + PartitionCols:_col0 + Select Operator [SEL_561] (rows=285117831 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_560] (rows=285117831 width=453) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_117_date_dim_d_date_sk_min) AND DynamicValue(RS_117_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_117_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_110] (rows=287989836 width=453) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] + <-Reducer 33 [BROADCAST_EDGE] vectorized + BROADCAST [RS_559] + Group By Operator [GBY_558] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_513] + Group By Operator [GBY_507] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_501] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_489] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_529] + PartitionCols:_col0 + Select Operator [SEL_528] (rows=80000000 width=212) + Output:["_col0","_col1"] + Group By Operator [GBY_527] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 + Group By Operator [GBY_38] (rows=80000000 width=764) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col10)"],keys:_col1, _col2, _col3, _col4, _col5, _col6, _col7 + Merge Join Operator [MERGEJOIN_460] (rows=101084444 width=764) + Conds:RS_477._col0=RS_35._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_477] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_475] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_459] (rows=101084444 width=115) + Conds:RS_526._col0=RS_492._col0(Inner),Output:["_col1","_col2"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_492] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_486] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_526] + PartitionCols:_col0 + Select Operator [SEL_525] (rows=285117831 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_524] (rows=285117831 width=453) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_24] (rows=287989836 width=453) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_523] + Group By Operator [GBY_522] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_509] + Group By Operator [GBY_503] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_493] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_475] (rows=652 width=8) - predicate:((d_year = 2001) and d_date_sk is not null) - Please refer to the previous TableScan [TS_106] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_526] - PartitionCols:_col0 - Select Operator [SEL_525] (rows=525327388 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_524] (rows=525327388 width=435) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_20] (rows=575995635 width=435) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"] - <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_523] - Group By Operator [GBY_522] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_503] - Group By Operator [GBY_497] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_489] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_479] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_521] - PartitionCols:_col0 - Select Operator [SEL_520] (rows=51391963 width=212) - Output:["_col0","_col1"] - Group By Operator [GBY_519] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] - PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Group By Operator [GBY_16] (rows=51391963 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col8, _col9, _col10, _col11 - Merge Join Operator [MERGEJOIN_458] (rows=51391963 width=764) - Conds:RS_12._col1=RS_515._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_515] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_512] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_457] (rows=51391963 width=115) - Conds:RS_510._col0=RS_486._col0(Inner),Output:["_col1","_col2"] - <-Map 31 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_486] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_478] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_510] - PartitionCols:_col0 - Select Operator [SEL_509] (rows=143930993 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_508] (rows=143930993 width=455) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=455) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"] - <-Reducer 34 [BROADCAST_EDGE] vectorized - BROADCAST [RS_507] - Group By Operator [GBY_506] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_502] - Group By Operator [GBY_496] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_487] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_478] + Please refer to the previous Select Operator [SEL_486] diff --git a/ql/src/test/results/clientpositive/perf/tez/query40.q.out b/ql/src/test/results/clientpositive/perf/tez/query40.q.out index afdf14246b..ce8ebe23a4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query40.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query40.q.out @@ -67,109 +67,109 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Map 5 <- Reducer 12 (BROADCAST_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 7 <- Map 10 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_131] - Limit [LIM_130] (rows=100 width=410) + Reducer 4 vectorized + File Output Operator [FS_132] + Limit [LIM_131] (rows=100 width=410) Number of rows:100 - Select Operator [SEL_129] (rows=769995 width=410) + Select Operator [SEL_130] (rows=769995 width=410) Output:["_col0","_col1","_col2","_col3"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] - Group By Operator [GBY_127] (rows=769995 width=410) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_129] + Group By Operator [GBY_128] (rows=769995 width=410) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_30] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_31] PartitionCols:_col0, _col1 - Group By Operator [GBY_29] (rows=5757278 width=410) + Group By Operator [GBY_30] (rows=5757278 width=410) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1 - Select Operator [SEL_27] (rows=5757278 width=278) + Select Operator [SEL_28] (rows=5757278 width=278) Output:["_col0","_col1","_col2","_col3"] - Top N Key Operator [TNK_58] (rows=5757278 width=278) - keys:_col14, _col12,top n:100 - Merge Join Operator [MERGEJOIN_106] (rows=5757278 width=278) - Conds:RS_24._col1=RS_126._col0(Inner),Output:["_col4","_col7","_col9","_col10","_col12","_col14"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] + Top N Key Operator [TNK_59] (rows=5757278 width=278) + keys:_col1, _col14,top n:100 + Merge Join Operator [MERGEJOIN_107] (rows=5757278 width=278) + Conds:RS_110._col0=RS_26._col1(Inner),Output:["_col1","_col6","_col9","_col11","_col12","_col14"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_110] PartitionCols:_col0 - Select Operator [SEL_125] (rows=27 width=90) + Select Operator [SEL_109] (rows=27 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_124] (rows=27 width=90) + Filter Operator [FIL_108] (rows=27 width=90) predicate:w_warehouse_sk is not null - TableScan [TS_12] (rows=27 width=90) + TableScan [TS_0] (rows=27 width=90) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_state"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_26] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_105] (rows=5757278 width=195) - Conds:RS_21._col2=RS_109._col0(Inner),Output:["_col1","_col4","_col7","_col9","_col10","_col12"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_109] + Merge Join Operator [MERGEJOIN_106] (rows=5757278 width=195) + Conds:RS_21._col2=RS_113._col0(Inner),Output:["_col1","_col4","_col7","_col9","_col10","_col12"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_113] PartitionCols:_col0 - Select Operator [SEL_108] (rows=51333 width=104) + Select Operator [SEL_112] (rows=51333 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_107] (rows=51333 width=215) + Filter Operator [FIL_111] (rows=51333 width=215) predicate:(i_current_price BETWEEN 0.99 AND 1.49 and i_item_sk is not null) - TableScan [TS_9] (rows=462000 width=215) + TableScan [TS_12] (rows=462000 width=215) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_current_price"] - <-Reducer 3 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_104] (rows=51815831 width=124) - Conds:RS_18._col0=RS_123._col0(Inner),Output:["_col1","_col2","_col4","_col7","_col9","_col10"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] + Merge Join Operator [MERGEJOIN_105] (rows=51815831 width=124) + Conds:RS_18._col0=RS_127._col0(Inner),Output:["_col1","_col2","_col4","_col7","_col9","_col10"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_127] PartitionCols:_col0 - Select Operator [SEL_122] (rows=8116 width=12) + Select Operator [SEL_126] (rows=8116 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_121] (rows=8116 width=98) + Filter Operator [FIL_125] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=98) + TableScan [TS_9] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_103] (rows=466374405 width=167) - Conds:RS_117._col2, _col3=RS_120._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col7"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] + Merge Join Operator [MERGEJOIN_104] (rows=466374405 width=167) + Conds:RS_121._col2, _col3=RS_124._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col7"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_121] PartitionCols:_col2, _col3 - Select Operator [SEL_116] (rows=285115816 width=127) + Select Operator [SEL_120] (rows=285115816 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_115] (rows=285115816 width=127) + Filter Operator [FIL_119] (rows=285115816 width=127) predicate:(cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_item_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_22_item_i_item_sk_min) AND DynamicValue(RS_22_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_22_item_i_item_sk_bloom_filter))) - TableScan [TS_0] (rows=287989836 width=127) + TableScan [TS_3] (rows=287989836 width=127) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_warehouse_sk","cs_item_sk","cs_order_number","cs_sales_price"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_114] - Group By Operator [GBY_113] (rows=1 width=12) + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_118] + Group By Operator [GBY_117] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_112] - Group By Operator [GBY_111] (rows=1 width=12) + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_116] + Group By Operator [GBY_115] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_110] (rows=51333 width=4) + Select Operator [SEL_114] (rows=51333 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_108] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] + Please refer to the previous Select Operator [SEL_112] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_124] PartitionCols:_col0, _col1 - Select Operator [SEL_119] (rows=28798881 width=117) + Select Operator [SEL_123] (rows=28798881 width=117) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_118] (rows=28798881 width=117) + Filter Operator [FIL_122] (rows=28798881 width=117) predicate:(cr_order_number is not null and cr_item_sk is not null) - TableScan [TS_3] (rows=28798881 width=117) + TableScan [TS_6] (rows=28798881 width=117) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query46.q.out b/ql/src/test/results/clientpositive/perf/tez/query46.q.out index adf780d747..d049cf4d5a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query46.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query46.q.out @@ -99,122 +99,122 @@ Stage-0 limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_176] - Limit [LIM_175] (rows=100 width=594) + File Output Operator [FS_177] + Limit [LIM_176] (rows=100 width=594) Number of rows:100 - Select Operator [SEL_174] (rows=8380115 width=594) + Select Operator [SEL_175] (rows=8380115 width=594) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_46] - Select Operator [SEL_45] (rows=8380115 width=594) + SHUFFLE [RS_47] + Select Operator [SEL_46] (rows=8380115 width=594) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Top N Key Operator [TNK_80] (rows=8380115 width=594) + Top N Key Operator [TNK_81] (rows=8380115 width=594) keys:_col3, _col2, _col5, _col8, _col6,top n:100 - Filter Operator [FIL_44] (rows=8380115 width=594) + Filter Operator [FIL_45] (rows=8380115 width=594) predicate:(_col5 <> _col8) - Merge Join Operator [MERGEJOIN_146] (rows=8380115 width=594) - Conds:RS_41._col0=RS_173._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10"] + Merge Join Operator [MERGEJOIN_147] (rows=8380115 width=594) + Conds:RS_42._col0=RS_174._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_41] + SHUFFLE [RS_42] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_141] (rows=80000000 width=277) - Conds:RS_149._col1=RS_152._col0(Inner),Output:["_col0","_col2","_col3","_col5"] + Merge Join Operator [MERGEJOIN_142] (rows=80000000 width=277) + Conds:RS_150._col1=RS_153._col0(Inner),Output:["_col0","_col2","_col3","_col5"] <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_152] + SHUFFLE [RS_153] PartitionCols:_col0 - Select Operator [SEL_151] (rows=40000000 width=97) + Select Operator [SEL_152] (rows=40000000 width=97) Output:["_col0","_col1"] - Filter Operator [FIL_150] (rows=40000000 width=97) + Filter Operator [FIL_151] (rows=40000000 width=97) predicate:ca_address_sk is not null TableScan [TS_3] (rows=40000000 width=97) default@customer_address,current_addr,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_city"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] + SHUFFLE [RS_150] PartitionCols:_col1 - Select Operator [SEL_148] (rows=80000000 width=188) + Select Operator [SEL_149] (rows=80000000 width=188) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_147] (rows=80000000 width=188) + Filter Operator [FIL_148] (rows=80000000 width=188) predicate:(c_customer_sk is not null and c_current_addr_sk is not null) TableScan [TS_0] (rows=80000000 width=188) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_173] + SHUFFLE [RS_174] PartitionCols:_col1 - Select Operator [SEL_172] (rows=8380115 width=321) + Select Operator [SEL_173] (rows=8380115 width=321) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_171] (rows=8380115 width=321) + Group By Operator [GBY_172] (rows=8380115 width=321) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_35] + SHUFFLE [RS_36] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_34] (rows=8380115 width=321) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)","sum(_col7)"],keys:_col1, _col12, _col3, _col5 - Merge Join Operator [MERGEJOIN_145] (rows=8380115 width=97) - Conds:RS_30._col3=RS_153._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col12"] + Group By Operator [GBY_35] (rows=8380115 width=321) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col8)","sum(_col9)"],keys:_col3, _col1, _col5, _col7 + Merge Join Operator [MERGEJOIN_146] (rows=8380115 width=97) + Conds:RS_154._col0=RS_32._col3(Inner),Output:["_col1","_col3","_col5","_col7","_col8","_col9"] <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] + SHUFFLE [RS_154] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_151] + Please refer to the previous Select Operator [SEL_152] <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_30] + SHUFFLE [RS_32] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_144] (rows=8380115 width=4) - Conds:RS_27._col2=RS_170._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_145] (rows=8380115 width=4) + Conds:RS_27._col2=RS_171._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_170] + SHUFFLE [RS_171] PartitionCols:_col0 - Select Operator [SEL_169] (rows=1855 width=4) + Select Operator [SEL_170] (rows=1855 width=4) Output:["_col0"] - Filter Operator [FIL_168] (rows=1855 width=12) + Filter Operator [FIL_169] (rows=1855 width=12) predicate:(((hd_dep_count = 2) or (hd_vehicle_count = 1)) and hd_demo_sk is not null) - TableScan [TS_15] (rows=7200 width=12) + TableScan [TS_18] (rows=7200 width=12) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_143] (rows=32526589 width=90) - Conds:RS_24._col4=RS_167._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_144] (rows=32526589 width=90) + Conds:RS_24._col4=RS_168._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_167] + SHUFFLE [RS_168] PartitionCols:_col0 - Select Operator [SEL_166] (rows=35 width=4) + Select Operator [SEL_167] (rows=35 width=4) Output:["_col0"] - Filter Operator [FIL_165] (rows=35 width=97) + Filter Operator [FIL_166] (rows=35 width=97) predicate:((s_city) IN ('Cedar Grove', 'Wildwood', 'Union', 'Salem', 'Highland Park') and s_store_sk is not null) - TableScan [TS_12] (rows=1704 width=97) + TableScan [TS_15] (rows=1704 width=97) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_city"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_142] (rows=196204013 width=218) - Conds:RS_164._col0=RS_156._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_143] (rows=196204013 width=218) + Conds:RS_165._col0=RS_157._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] + SHUFFLE [RS_157] PartitionCols:_col0 - Select Operator [SEL_155] (rows=783 width=4) + Select Operator [SEL_156] (rows=783 width=4) Output:["_col0"] - Filter Operator [FIL_154] (rows=783 width=12) + Filter Operator [FIL_155] (rows=783 width=12) predicate:((d_year) IN (1998, 1999, 2000) and (d_dow) IN (6, 0) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=12) + TableScan [TS_12] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dow"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] + SHUFFLE [RS_165] PartitionCols:_col0 - Select Operator [SEL_163] (rows=457565061 width=237) + Select Operator [SEL_164] (rows=457565061 width=237) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_162] (rows=457565061 width=237) + Filter Operator [FIL_163] (rows=457565061 width=237) predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_6] (rows=575995635 width=237) + TableScan [TS_9] (rows=575995635 width=237) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_ticket_number","ss_coupon_amt","ss_net_profit"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_161] - Group By Operator [GBY_160] (rows=1 width=12) + BROADCAST [RS_162] + Group By Operator [GBY_161] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] - Group By Operator [GBY_158] (rows=1 width=12) + SHUFFLE [RS_160] + Group By Operator [GBY_159] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_157] (rows=783 width=4) + Select Operator [SEL_158] (rows=783 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_155] + Please refer to the previous Select Operator [SEL_156] diff --git a/ql/src/test/results/clientpositive/perf/tez/query47.q.out b/ql/src/test/results/clientpositive/perf/tez/query47.q.out index fc7aa1b31b..60bdbec25b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query47.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query47.q.out @@ -111,158 +111,158 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 13 (BROADCAST_EDGE) -Reducer 10 <- Reducer 5 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 3 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Map 11 <- Reducer 14 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 10 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 9 <- Reducer 4 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_322] - Limit [LIM_321] (rows=100 width=658) + Reducer 7 vectorized + File Output Operator [FS_326] + Limit [LIM_325] (rows=100 width=658) Number of rows:100 - Select Operator [SEL_320] (rows=301730 width=658) + Select Operator [SEL_324] (rows=301730 width=658) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_110] - Select Operator [SEL_109] (rows=301730 width=658) + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_114] + Select Operator [SEL_113] (rows=301730 width=658) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Top N Key Operator [TNK_178] (rows=301730 width=546) - keys:(_col12 - _col13), _col11,top n:100 - Merge Join Operator [MERGEJOIN_279] (rows=301730 width=546) - Conds:RS_106._col6, _col7, _col8, _col9, _col14=RS_307._col0, _col1, _col2, _col3, _col5(Inner),Output:["_col4","_col6","_col10","_col11","_col12","_col13","_col19"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_307] + Top N Key Operator [TNK_182] (rows=301730 width=546) + keys:(_col18 - _col19), _col17,top n:100 + Merge Join Operator [MERGEJOIN_283] (rows=301730 width=546) + Conds:RS_311._col0, _col1, _col2, _col3, _col5=RS_111._col6, _col7, _col8, _col9, _col14(Inner),Output:["_col4","_col10","_col12","_col16","_col17","_col18","_col19"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_311] PartitionCols:_col0, _col1, _col2, _col3, _col5 - Select Operator [SEL_305] (rows=1810380 width=485) + Select Operator [SEL_309] (rows=1810380 width=485) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_303] (rows=1810380 width=489) + Filter Operator [FIL_307] (rows=1810380 width=489) predicate:rank_window_0 is not null - PTF Operator [PTF_301] (rows=1810380 width=489) + PTF Operator [PTF_305] (rows=1810380 width=489) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4, _col5"}] - Select Operator [SEL_300] (rows=1810380 width=489) + Select Operator [SEL_304] (rows=1810380 width=489) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_302] PartitionCols:_col1, _col0, _col4, _col5 - Group By Operator [GBY_297] (rows=1810380 width=489) + Group By Operator [GBY_301] (rows=1810380 width=489) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_93] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_92] (rows=162257387 width=489) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col8, _col9, _col5, _col6, _col11, _col12 - Merge Join Operator [MERGEJOIN_277] (rows=162257387 width=472) - Conds:RS_88._col2=RS_296._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col11","_col12"] + Group By Operator [GBY_23] (rows=162257387 width=489) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col6)"],keys:_col1, _col2, _col8, _col9, _col11, _col12 + Merge Join Operator [MERGEJOIN_275] (rows=162257387 width=472) + Conds:RS_19._col5=RS_300._col0(Inner),Output:["_col1","_col2","_col6","_col8","_col9","_col11","_col12"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_296] + SHUFFLE [RS_300] PartitionCols:_col0 - Select Operator [SEL_295] (rows=1704 width=183) + Select Operator [SEL_299] (rows=1704 width=183) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_294] (rows=1704 width=183) + Filter Operator [FIL_298] (rows=1704 width=183) predicate:(s_store_sk is not null and s_store_name is not null and s_company_name is not null) - TableScan [TS_79] (rows=1704 width=183) + TableScan [TS_13] (rows=1704 width=183) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_company_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_88] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_276] (rows=162257387 width=297) - Conds:RS_85._col1=RS_293._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_293] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_274] (rows=162257387 width=297) + Conds:RS_286._col0=RS_17._col1(Inner),Output:["_col1","_col2","_col5","_col6","_col8","_col9"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_286] PartitionCols:_col0 - Select Operator [SEL_292] (rows=462000 width=194) + Select Operator [SEL_285] (rows=462000 width=194) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_291] (rows=462000 width=194) + Filter Operator [FIL_284] (rows=462000 width=194) predicate:(i_item_sk is not null and i_category is not null and i_brand is not null) - TableScan [TS_76] (rows=462000 width=194) + TableScan [TS_0] (rows=462000 width=194) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_category"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_85] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_17] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_275] (rows=162257387 width=111) - Conds:RS_290._col0=RS_282._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_282] + Merge Join Operator [MERGEJOIN_273] (rows=162257387 width=111) + Conds:RS_297._col0=RS_289._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_289] PartitionCols:_col0 - Select Operator [SEL_281] (rows=564 width=12) + Select Operator [SEL_288] (rows=564 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_280] (rows=564 width=12) + Filter Operator [FIL_287] (rows=564 width=12) predicate:((d_year) IN (2000, 1999, 2001) and ((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and d_date_sk is not null) - TableScan [TS_73] (rows=73049 width=12) + TableScan [TS_6] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_290] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_297] PartitionCols:_col0 - Select Operator [SEL_289] (rows=525329897 width=118) + Select Operator [SEL_296] (rows=525329897 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_288] (rows=525329897 width=118) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_83_date_dim_d_date_sk_min) AND DynamicValue(RS_83_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_83_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_70] (rows=575995635 width=118) + Filter Operator [FIL_295] (rows=525329897 width=118) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_3] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_287] - Group By Operator [GBY_286] (rows=1 width=12) + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_294] + Group By Operator [GBY_293] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_285] - Group By Operator [GBY_284] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_292] + Group By Operator [GBY_291] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_283] (rows=564 width=4) + Select Operator [SEL_290] (rows=564 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_281] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_106] + Please refer to the previous Select Operator [SEL_288] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_111] PartitionCols:_col6, _col7, _col8, _col9, _col14 - Merge Join Operator [MERGEJOIN_278] (rows=301730 width=717) - Conds:RS_308._col0, _col1, _col2, _col3, _col5=RS_319._col0, _col1, _col2, _col3, _col8(Inner),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_308] + Merge Join Operator [MERGEJOIN_282] (rows=301730 width=717) + Conds:RS_312._col0, _col1, _col2, _col3, _col5=RS_323._col0, _col1, _col2, _col3, _col8(Inner),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_312] PartitionCols:_col0, _col1, _col2, _col3, _col5 - Select Operator [SEL_306] (rows=1810380 width=485) + Select Operator [SEL_310] (rows=1810380 width=485) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_304] (rows=1810380 width=489) + Filter Operator [FIL_308] (rows=1810380 width=489) predicate:rank_window_0 is not null - PTF Operator [PTF_302] (rows=1810380 width=489) + PTF Operator [PTF_306] (rows=1810380 width=489) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4, _col5"}] - Please refer to the previous Select Operator [SEL_300] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] + Please refer to the previous Select Operator [SEL_304] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] PartitionCols:_col0, _col1, _col2, _col3, _col8 - Select Operator [SEL_318] (rows=301730 width=605) + Select Operator [SEL_322] (rows=301730 width=605) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_317] (rows=301730 width=605) + Filter Operator [FIL_321] (rows=301730 width=605) predicate:CASE WHEN ((_col0 > 0)) THEN (((abs((_col7 - _col0)) / _col0) > 0.1)) ELSE (false) END - Select Operator [SEL_316] (rows=603460 width=601) + Select Operator [SEL_320] (rows=603460 width=601) Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_315] (rows=603460 width=601) + Filter Operator [FIL_319] (rows=603460 width=601) predicate:((_col0 > 0) and rank_window_1 is not null and (_col3 = 2000)) - PTF Operator [PTF_314] (rows=1810380 width=601) + PTF Operator [PTF_318] (rows=1810380 width=601) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST, _col4 ASC NULLS LAST","partition by:":"_col2, _col1, _col5, _col6"}] - Select Operator [SEL_313] (rows=1810380 width=601) + Select Operator [SEL_317] (rows=1810380 width=601) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_316] PartitionCols:_col1, _col0, _col4, _col5 - Select Operator [SEL_311] (rows=1810380 width=489) + Select Operator [SEL_315] (rows=1810380 width=489) Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - PTF Operator [PTF_310] (rows=1810380 width=489) + PTF Operator [PTF_314] (rows=1810380 width=489) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col1, _col0, _col4, _col5, _col2"}] - Select Operator [SEL_309] (rows=1810380 width=489) + Select Operator [SEL_313] (rows=1810380 width=489) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_299] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_303] PartitionCols:_col1, _col0, _col4, _col5, _col2 - Please refer to the previous Group By Operator [GBY_297] + Please refer to the previous Group By Operator [GBY_301] diff --git a/ql/src/test/results/clientpositive/perf/tez/query48.q.out b/ql/src/test/results/clientpositive/perf/tez/query48.q.out index c232f1713a..8c56fda344 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query48.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query48.q.out @@ -143,99 +143,101 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Map 8 <- Reducer 11 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 9 <- Map 10 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_118] - Group By Operator [GBY_117] (rows=1 width=8) + Reducer 3 vectorized + File Output Operator [FS_121] + Group By Operator [GBY_120] (rows=1 width=8) Output:["_col0"],aggregations:["sum(VALUE._col0)"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_30] - Group By Operator [GBY_29] (rows=1 width=8) - Output:["_col0"],aggregations:["sum(_col4)"] - Merge Join Operator [MERGEJOIN_96] (rows=170127 width=0) - Conds:RS_25._col3=RS_116._col0(Inner),Output:["_col4"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_116] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_33] + Group By Operator [GBY_32] (rows=1 width=8) + Output:["_col0"],aggregations:["sum(_col10)"] + Merge Join Operator [MERGEJOIN_99] (rows=170127 width=0) + Conds:RS_102._col0=RS_29._col8(Inner),Output:["_col10"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_102] PartitionCols:_col0 - Select Operator [SEL_115] (rows=1704 width=4) + Select Operator [SEL_101] (rows=1704 width=4) Output:["_col0"] - Filter Operator [FIL_114] (rows=1704 width=4) + Filter Operator [FIL_100] (rows=1704 width=4) predicate:s_store_sk is not null - TableScan [TS_12] (rows=1704 width=4) + TableScan [TS_0] (rows=1704 width=4) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col3 - Filter Operator [FIL_24] (rows=170127 width=24) - predicate:((_col11 and _col5) or (_col12 and _col6) or (_col13 and _col7)) - Merge Join Operator [MERGEJOIN_95] (rows=226838 width=24) - Conds:RS_21._col2=RS_113._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col11","_col12","_col13"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_113] - PartitionCols:_col0 - Select Operator [SEL_112] (rows=3529412 width=16) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_111] (rows=3529412 width=187) - predicate:((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) - TableScan [TS_9] (rows=40000000 width=187) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_94] (rows=2570826 width=12) - Conds:RS_18._col1=RS_110._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] - PartitionCols:_col0 - Select Operator [SEL_109] (rows=29552 width=4) - Output:["_col0"] - Filter Operator [FIL_108] (rows=29552 width=183) - predicate:((cd_marital_status = 'M') and (cd_education_status = '4 yr Degree') and cd_demo_sk is not null) - TableScan [TS_6] (rows=1861800 width=183) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_93] (rows=57024544 width=25) - Conds:RS_107._col0=RS_99._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_99] - PartitionCols:_col0 - Select Operator [SEL_98] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_97] (rows=652 width=8) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_107] - PartitionCols:_col0 - Select Operator [SEL_106] (rows=159705894 width=31) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_105] (rows=159705894 width=233) - predicate:((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_addr_sk is not null and ss_store_sk is not null and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or (ss_sales_price >= 50) or (ss_sales_price <= 100) or (ss_sales_price >= 150) or (ss_sales_price <= 200)) and ((ss_net_profit >= 0) or (ss_net_profit <= 2000) or (ss_net_profit >= 150) or (ss_net_profit <= 3000) or (ss_net_profit >= 50) or (ss_net_profit <= 25000)) and ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=233) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_104] - Group By Operator [GBY_103] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_102] - Group By Operator [GBY_101] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_100] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_98] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col8 + Select Operator [SEL_27] (rows=170127 width=24) + Output:["_col8","_col9"] + Filter Operator [FIL_26] (rows=170127 width=24) + predicate:((_col1 and _col10) or (_col2 and _col11) or (_col3 and _col12)) + Merge Join Operator [MERGEJOIN_98] (rows=226838 width=24) + Conds:RS_105._col0=RS_24._col3(Inner),Output:["_col1","_col2","_col3","_col8","_col9","_col10","_col11","_col12"] + <-Map 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_105] + PartitionCols:_col0 + Select Operator [SEL_104] (rows=3529412 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_103] (rows=3529412 width=187) + predicate:((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_97] (rows=2570826 width=12) + Conds:RS_108._col0=RS_20._col1(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_108] + PartitionCols:_col0 + Select Operator [SEL_107] (rows=29552 width=4) + Output:["_col0"] + Filter Operator [FIL_106] (rows=29552 width=183) + predicate:((cd_marital_status = 'M') and (cd_education_status = '4 yr Degree') and cd_demo_sk is not null) + TableScan [TS_6] (rows=1861800 width=183) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_20] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_96] (rows=57024544 width=25) + Conds:RS_119._col0=RS_111._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_111] + PartitionCols:_col0 + Select Operator [SEL_110] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_109] (rows=652 width=8) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_12] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_119] + PartitionCols:_col0 + Select Operator [SEL_118] (rows=159705894 width=31) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_117] (rows=159705894 width=233) + predicate:((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_addr_sk is not null and ss_store_sk is not null and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or (ss_sales_price >= 50) or (ss_sales_price <= 100) or (ss_sales_price >= 150) or (ss_sales_price <= 200)) and ((ss_net_profit >= 0) or (ss_net_profit <= 2000) or (ss_net_profit >= 150) or (ss_net_profit <= 3000) or (ss_net_profit >= 50) or (ss_net_profit <= 25000)) and ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_9] (rows=575995635 width=233) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_116] + Group By Operator [GBY_115] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_114] + Group By Operator [GBY_113] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_112] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_110] diff --git a/ql/src/test/results/clientpositive/perf/tez/query49.q.out b/ql/src/test/results/clientpositive/perf/tez/query49.q.out index 8a51b515d3..60a0bc4a69 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query49.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query49.q.out @@ -269,281 +269,281 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 13 (BROADCAST_EDGE) -Map 27 <- Reducer 19 (BROADCAST_EDGE) -Map 29 <- Reducer 25 (BROADCAST_EDGE) -Reducer 10 <- Union 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 12 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 15 <- Map 28 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Map 11 <- Reducer 14 (BROADCAST_EDGE) +Map 28 <- Reducer 20 (BROADCAST_EDGE) +Map 30 <- Reducer 26 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 13 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE) +Reducer 16 <- Map 27 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) Reducer 17 <- Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 19 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 20 <- Map 12 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE) -Reducer 21 <- Map 30 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 20 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 21 <- Map 13 (SIMPLE_EDGE), Map 30 (SIMPLE_EDGE) +Reducer 22 <- Map 29 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) Reducer 23 <- Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Reducer 23 (SIMPLE_EDGE), Union 9 (CONTAINS) -Reducer 25 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 26 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Reducer 24 (SIMPLE_EDGE), Union 8 (CONTAINS) +Reducer 26 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 8 <- Union 7 (SIMPLE_EDGE), Union 9 (CONTAINS) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 7 <- Union 6 (SIMPLE_EDGE), Union 8 (CONTAINS) +Reducer 9 <- Union 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 11 vectorized - File Output Operator [FS_312] - Limit [LIM_311] (rows=100 width=215) + Reducer 10 vectorized + File Output Operator [FS_315] + Limit [LIM_314] (rows=100 width=215) Number of rows:100 - Select Operator [SEL_310] (rows=3418 width=215) + Select Operator [SEL_313] (rows=3418 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_309] - Select Operator [SEL_308] (rows=3418 width=215) + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_312] + Select Operator [SEL_311] (rows=3418 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_307] (rows=3418 width=215) + Group By Operator [GBY_310] (rows=3418 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Union 9 [SIMPLE_EDGE] - <-Reducer 24 [CONTAINS] vectorized - Reduce Output Operator [RS_353] + <-Union 8 [SIMPLE_EDGE] + <-Reducer 25 [CONTAINS] vectorized + Reduce Output Operator [RS_356] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_352] (rows=3418 width=215) + Group By Operator [GBY_355] (rows=3418 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Top N Key Operator [TNK_351] (rows=3418 width=214) + Top N Key Operator [TNK_354] (rows=3418 width=214) keys:_col0, _col3, _col4, _col1, _col2,top n:100 - Select Operator [SEL_350] (rows=1142 width=213) + Select Operator [SEL_353] (rows=1142 width=213) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_349] (rows=1142 width=248) + Filter Operator [FIL_352] (rows=1142 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_348] (rows=1714 width=248) + PTF Operator [PTF_351] (rows=1714 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_347] (rows=1714 width=248) + Select Operator [SEL_350] (rows=1714 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_346] + <-Reducer 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_349] PartitionCols:0 - Select Operator [SEL_345] (rows=1714 width=244) + Select Operator [SEL_348] (rows=1714 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_344] (rows=1714 width=244) + PTF Operator [PTF_347] (rows=1714 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_343] (rows=1714 width=244) + Select Operator [SEL_346] (rows=1714 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_342] + <-Reducer 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_345] PartitionCols:0 - Group By Operator [GBY_341] (rows=1714 width=244) + Group By Operator [GBY_344] (rows=1714 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_86] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_89] PartitionCols:_col0 - Group By Operator [GBY_85] (rows=3428 width=244) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)","sum(_col3)","sum(_col9)","sum(_col4)"],keys:_col1 - Merge Join Operator [MERGEJOIN_239] (rows=1673571 width=236) - Conds:RS_81._col1, _col2=RS_340._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col8","_col9"] - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_340] + Group By Operator [GBY_88] (rows=3428 width=244) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col7)","sum(_col3)","sum(_col8)"],keys:_col5 + Merge Join Operator [MERGEJOIN_242] (rows=1673571 width=236) + Conds:RS_343._col0, _col1=RS_85._col1, _col2(Inner),Output:["_col2","_col3","_col5","_col7","_col8"] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_343] PartitionCols:_col0, _col1 - Select Operator [SEL_339] (rows=19197050 width=124) + Select Operator [SEL_342] (rows=19197050 width=124) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_338] (rows=19197050 width=119) + Filter Operator [FIL_341] (rows=19197050 width=119) predicate:((sr_return_amt > 10000) and sr_ticket_number is not null and sr_item_sk is not null) - TableScan [TS_75] (rows=57591150 width=119) + TableScan [TS_71] (rows=57591150 width=119) default@store_returns,sr,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_quantity","sr_return_amt"] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_81] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_85] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_238] (rows=1673571 width=124) - Conds:RS_337._col0=RS_274._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_274] + Merge Join Operator [MERGEJOIN_241] (rows=1673571 width=124) + Conds:RS_340._col0=RS_280._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_280] PartitionCols:_col0 - Select Operator [SEL_269] (rows=50 width=4) + Select Operator [SEL_275] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_268] (rows=50 width=12) + Filter Operator [FIL_274] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 12) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=12) + TableScan [TS_6] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_337] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_340] PartitionCols:_col0 - Select Operator [SEL_336] (rows=61119617 width=127) + Select Operator [SEL_339] (rows=61119617 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_335] (rows=61119617 width=229) - predicate:((ss_net_profit > 1) and (ss_net_paid > 0) and (ss_quantity > 0) and ss_sold_date_sk is not null and ss_ticket_number is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_79_date_dim_d_date_sk_min) AND DynamicValue(RS_79_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_79_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_69] (rows=575995635 width=229) + Filter Operator [FIL_338] (rows=61119617 width=229) + predicate:((ss_net_profit > 1) and (ss_net_paid > 0) and (ss_quantity > 0) and ss_sold_date_sk is not null and ss_ticket_number is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_81_date_dim_d_date_sk_min) AND DynamicValue(RS_81_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_81_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_74] (rows=575995635 width=229) default@store_sales,sts,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_net_paid","ss_net_profit"] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_334] - Group By Operator [GBY_333] (rows=1 width=12) + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_337] + Group By Operator [GBY_336] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_281] - Group By Operator [GBY_278] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_287] + Group By Operator [GBY_284] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_275] (rows=50 width=4) + Select Operator [SEL_281] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_269] - <-Reducer 8 [CONTAINS] vectorized - Reduce Output Operator [RS_306] + Please refer to the previous Select Operator [SEL_275] + <-Reducer 7 [CONTAINS] vectorized + Reduce Output Operator [RS_309] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_305] (rows=3418 width=215) + Group By Operator [GBY_308] (rows=3418 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Top N Key Operator [TNK_304] (rows=3418 width=214) + Top N Key Operator [TNK_307] (rows=3418 width=214) keys:_col0, _col3, _col4, _col1, _col2,top n:100 - Select Operator [SEL_303] (rows=2276 width=215) + Select Operator [SEL_306] (rows=2276 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_302] (rows=2276 width=215) + Group By Operator [GBY_305] (rows=2276 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Union 7 [SIMPLE_EDGE] - <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_332] + <-Union 6 [SIMPLE_EDGE] + <-Reducer 19 [CONTAINS] vectorized + Reduce Output Operator [RS_335] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_331] (rows=2276 width=215) + Group By Operator [GBY_334] (rows=2276 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Select Operator [SEL_330] (rows=1134 width=215) + Select Operator [SEL_333] (rows=1134 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_329] (rows=1134 width=248) + Filter Operator [FIL_332] (rows=1134 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_328] (rows=1701 width=248) + PTF Operator [PTF_331] (rows=1701 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_327] (rows=1701 width=248) + Select Operator [SEL_330] (rows=1701 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_326] + <-Reducer 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_329] PartitionCols:0 - Select Operator [SEL_325] (rows=1701 width=244) + Select Operator [SEL_328] (rows=1701 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_324] (rows=1701 width=244) + PTF Operator [PTF_327] (rows=1701 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_323] (rows=1701 width=244) + Select Operator [SEL_326] (rows=1701 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_322] + <-Reducer 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_325] PartitionCols:0 - Group By Operator [GBY_321] (rows=1701 width=244) + Group By Operator [GBY_324] (rows=1701 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_48] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_50] PartitionCols:_col0 - Group By Operator [GBY_47] (rows=1701 width=244) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)","sum(_col3)","sum(_col9)","sum(_col4)"],keys:_col1 - Merge Join Operator [MERGEJOIN_237] (rows=865646 width=236) - Conds:RS_43._col1, _col2=RS_320._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col8","_col9"] - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + Group By Operator [GBY_49] (rows=1701 width=244) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col7)","sum(_col3)","sum(_col8)"],keys:_col5 + Merge Join Operator [MERGEJOIN_240] (rows=865646 width=236) + Conds:RS_323._col0, _col1=RS_46._col1, _col2(Inner),Output:["_col2","_col3","_col5","_col7","_col8"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] PartitionCols:_col0, _col1 - Select Operator [SEL_319] (rows=9599627 width=124) + Select Operator [SEL_322] (rows=9599627 width=124) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_318] (rows=9599627 width=121) + Filter Operator [FIL_321] (rows=9599627 width=121) predicate:((cr_return_amount > 10000) and cr_order_number is not null and cr_item_sk is not null) - TableScan [TS_37] (rows=28798881 width=121) + TableScan [TS_32] (rows=28798881 width=121) default@catalog_returns,cr,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_quantity","cr_return_amount"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_43] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_46] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_236] (rows=865646 width=124) - Conds:RS_317._col0=RS_272._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_272] + Merge Join Operator [MERGEJOIN_239] (rows=865646 width=124) + Conds:RS_320._col0=RS_278._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_278] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_269] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_317] + Please refer to the previous Select Operator [SEL_275] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] PartitionCols:_col0 - Select Operator [SEL_316] (rows=31838858 width=127) + Select Operator [SEL_319] (rows=31838858 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_315] (rows=31838858 width=239) - predicate:((cs_net_profit > 1) and (cs_net_paid > 0) and (cs_quantity > 0) and cs_sold_date_sk is not null and cs_order_number is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_41_date_dim_d_date_sk_min) AND DynamicValue(RS_41_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_41_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_31] (rows=287989836 width=239) + Filter Operator [FIL_318] (rows=31838858 width=239) + predicate:((cs_net_profit > 1) and (cs_net_paid > 0) and (cs_quantity > 0) and cs_sold_date_sk is not null and cs_order_number is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_42_date_dim_d_date_sk_min) AND DynamicValue(RS_42_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_35] (rows=287989836 width=239) default@catalog_sales,cs,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_net_paid","cs_net_profit"] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_314] - Group By Operator [GBY_313] (rows=1 width=12) + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_317] + Group By Operator [GBY_316] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_280] - Group By Operator [GBY_277] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_286] + Group By Operator [GBY_283] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_273] (rows=50 width=4) + Select Operator [SEL_279] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_269] - <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_301] + Please refer to the previous Select Operator [SEL_275] + <-Reducer 5 [CONTAINS] vectorized + Reduce Output Operator [RS_304] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_300] (rows=2276 width=215) + Group By Operator [GBY_303] (rows=2276 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Select Operator [SEL_299] (rows=1142 width=211) + Select Operator [SEL_302] (rows=1142 width=211) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_298] (rows=1142 width=248) + Filter Operator [FIL_301] (rows=1142 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_297] (rows=1714 width=248) + PTF Operator [PTF_300] (rows=1714 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_296] (rows=1714 width=248) + Select Operator [SEL_299] (rows=1714 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_295] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_298] PartitionCols:0 - Select Operator [SEL_294] (rows=1714 width=244) + Select Operator [SEL_297] (rows=1714 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_293] (rows=1714 width=244) + PTF Operator [PTF_296] (rows=1714 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_292] (rows=1714 width=244) + Select Operator [SEL_295] (rows=1714 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_291] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_294] PartitionCols:0 - Group By Operator [GBY_290] (rows=1714 width=244) + Group By Operator [GBY_293] (rows=1714 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=1714 width=244) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)","sum(_col3)","sum(_col9)","sum(_col4)"],keys:_col1 - Merge Join Operator [MERGEJOIN_235] (rows=438010 width=236) - Conds:RS_12._col1, _col2=RS_289._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col8","_col9"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] + Group By Operator [GBY_17] (rows=1714 width=244) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col7)","sum(_col3)","sum(_col8)"],keys:_col5 + Merge Join Operator [MERGEJOIN_238] (rows=438010 width=236) + Conds:RS_273._col0, _col1=RS_14._col1, _col2(Inner),Output:["_col2","_col3","_col5","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_273] PartitionCols:_col0, _col1 - Select Operator [SEL_288] (rows=4799489 width=124) + Select Operator [SEL_272] (rows=4799489 width=124) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_287] (rows=4799489 width=118) + Filter Operator [FIL_271] (rows=4799489 width=118) predicate:((wr_return_amt > 10000) and wr_order_number is not null and wr_item_sk is not null) - TableScan [TS_6] (rows=14398467 width=118) + TableScan [TS_0] (rows=14398467 width=118) default@web_returns,wr,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_quantity","wr_return_amt"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_234] (rows=438010 width=124) - Conds:RS_286._col0=RS_270._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_270] + Merge Join Operator [MERGEJOIN_237] (rows=438010 width=124) + Conds:RS_292._col0=RS_276._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_276] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_269] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_286] + Please refer to the previous Select Operator [SEL_275] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_292] PartitionCols:_col0 - Select Operator [SEL_285] (rows=15996318 width=127) + Select Operator [SEL_291] (rows=15996318 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_284] (rows=15996318 width=239) + Filter Operator [FIL_290] (rows=15996318 width=239) predicate:((ws_net_profit > 1) and (ws_net_paid > 0) and (ws_quantity > 0) and ws_sold_date_sk is not null and ws_order_number is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=239) + TableScan [TS_3] (rows=144002668 width=239) default@web_sales,ws,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_net_paid","ws_net_profit"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_283] - Group By Operator [GBY_282] (rows=1 width=12) + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_289] + Group By Operator [GBY_288] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_279] - Group By Operator [GBY_276] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_285] + Group By Operator [GBY_282] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_271] (rows=50 width=4) + Select Operator [SEL_277] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_269] + Please refer to the previous Select Operator [SEL_275] diff --git a/ql/src/test/results/clientpositive/perf/tez/query5.q.out b/ql/src/test/results/clientpositive/perf/tez/query5.q.out index 4009b72e59..0508a02782 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query5.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query5.q.out @@ -278,13 +278,13 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Reducer 11 (BROADCAST_EDGE), Union 2 (CONTAINS) -Map 21 <- Reducer 15 (BROADCAST_EDGE), Union 22 (CONTAINS) -Map 23 <- Union 22 (CONTAINS) +Map 22 <- Reducer 15 (BROADCAST_EDGE), Union 23 (CONTAINS) +Map 24 <- Union 23 (CONTAINS) Map 25 <- Reducer 19 (BROADCAST_EDGE), Union 26 (CONTAINS) Map 9 <- Union 2 (CONTAINS) Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 10 (SIMPLE_EDGE), Union 22 (SIMPLE_EDGE) -Reducer 13 <- Map 24 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 12 <- Map 10 (SIMPLE_EDGE), Union 23 (SIMPLE_EDGE) +Reducer 13 <- Map 21 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Union 6 (CONTAINS) Reducer 15 <- Map 10 (CUSTOM_SIMPLE_EDGE) Reducer 16 <- Map 10 (SIMPLE_EDGE), Union 26 (SIMPLE_EDGE) @@ -303,235 +303,235 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_309] - Limit [LIM_308] (rows=100 width=619) + File Output Operator [FS_310] + Limit [LIM_309] (rows=100 width=619) Number of rows:100 - Select Operator [SEL_307] (rows=59581 width=619) + Select Operator [SEL_308] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_306] - Select Operator [SEL_305] (rows=59581 width=619) + SHUFFLE [RS_307] + Select Operator [SEL_306] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_304] (rows=59581 width=627) + Group By Operator [GBY_305] (rows=59581 width=627) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 6 [SIMPLE_EDGE] <-Reducer 14 [CONTAINS] vectorized - Reduce Output Operator [RS_320] + Reduce Output Operator [RS_321] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_319] (rows=59581 width=627) + Group By Operator [GBY_320] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_318] (rows=39721 width=618) + Top N Key Operator [TNK_319] (rows=39721 width=618) keys:_col0, _col1,top n:100 - Select Operator [SEL_317] (rows=38846 width=619) + Select Operator [SEL_318] (rows=38846 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_316] (rows=38846 width=548) + Group By Operator [GBY_317] (rows=38846 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_47] + SHUFFLE [RS_48] PartitionCols:_col0 - Group By Operator [GBY_46] (rows=2835758 width=548) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 - Merge Join Operator [MERGEJOIN_227] (rows=34813117 width=535) - Conds:RS_42._col0=RS_315._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_315] + Group By Operator [GBY_47] (rows=2835758 width=548) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col4)","sum(_col6)","sum(_col5)","sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_228] (rows=34813117 width=535) + Conds:RS_316._col0=RS_44._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_316] PartitionCols:_col0 - Select Operator [SEL_314] (rows=46000 width=104) + Select Operator [SEL_315] (rows=46000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_313] (rows=46000 width=104) + Filter Operator [FIL_314] (rows=46000 width=104) predicate:cp_catalog_page_sk is not null - TableScan [TS_36] (rows=46000 width=104) + TableScan [TS_25] (rows=46000 width=104) default@catalog_page,catalog_page,Tbl:COMPLETE,Col:COMPLETE,Output:["cp_catalog_page_sk","cp_catalog_page_id"] <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_42] + SHUFFLE [RS_44] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_226] (rows=34813117 width=438) - Conds:Union 22._col1=RS_281._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_227] (rows=34813117 width=438) + Conds:Union 23._col1=RS_282._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_281] + SHUFFLE [RS_282] PartitionCols:_col0 - Select Operator [SEL_278] (rows=8116 width=4) + Select Operator [SEL_279] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_277] (rows=8116 width=98) + Filter Operator [FIL_278] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-08-18 00:00:00' and d_date_sk is not null) TableScan [TS_8] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Union 22 [SIMPLE_EDGE] - <-Map 21 [CONTAINS] vectorized - Reduce Output Operator [RS_333] + <-Union 23 [SIMPLE_EDGE] + <-Map 22 [CONTAINS] vectorized + Reduce Output Operator [RS_334] PartitionCols:_col1 - Select Operator [SEL_332] (rows=285117694 width=455) + Select Operator [SEL_333] (rows=285117694 width=455) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_331] (rows=285117694 width=231) + Filter Operator [FIL_332] (rows=285117694 width=231) predicate:(cs_sold_date_sk is not null and cs_catalog_page_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_258] (rows=287989836 width=231) + TableScan [TS_259] (rows=287989836 width=231) Output:["cs_sold_date_sk","cs_catalog_page_sk","cs_ext_sales_price","cs_net_profit"] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_330] - Group By Operator [GBY_329] (rows=1 width=12) + BROADCAST [RS_331] + Group By Operator [GBY_330] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] - Group By Operator [GBY_286] (rows=1 width=12) + SHUFFLE [RS_290] + Group By Operator [GBY_287] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_282] (rows=8116 width=4) + Select Operator [SEL_283] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_278] - <-Map 23 [CONTAINS] vectorized - Reduce Output Operator [RS_336] + Please refer to the previous Select Operator [SEL_279] + <-Map 24 [CONTAINS] vectorized + Reduce Output Operator [RS_337] PartitionCols:_col1 - Select Operator [SEL_335] (rows=28221805 width=451) + Select Operator [SEL_336] (rows=28221805 width=451) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_334] (rows=28221805 width=227) + Filter Operator [FIL_335] (rows=28221805 width=227) predicate:(cr_catalog_page_sk is not null and cr_returned_date_sk is not null) - TableScan [TS_263] (rows=28798881 width=227) + TableScan [TS_264] (rows=28798881 width=227) Output:["cr_returned_date_sk","cr_catalog_page_sk","cr_return_amount","cr_net_loss"] <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_328] + Reduce Output Operator [RS_329] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_327] (rows=59581 width=627) + Group By Operator [GBY_328] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_326] (rows=39721 width=618) + Top N Key Operator [TNK_327] (rows=39721 width=618) keys:_col0, _col1,top n:100 - Select Operator [SEL_325] (rows=53 width=615) + Select Operator [SEL_326] (rows=53 width=615) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_324] (rows=53 width=548) + Group By Operator [GBY_325] (rows=53 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_80] + SHUFFLE [RS_81] PartitionCols:_col0 - Group By Operator [GBY_79] (rows=3498 width=548) + Group By Operator [GBY_80] (rows=3498 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 - Merge Join Operator [MERGEJOIN_229] (rows=30966059 width=543) - Conds:RS_75._col0=RS_323._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] + Merge Join Operator [MERGEJOIN_230] (rows=30966059 width=543) + Conds:RS_76._col0=RS_324._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_323] + SHUFFLE [RS_324] PartitionCols:_col0 - Select Operator [SEL_322] (rows=84 width=104) + Select Operator [SEL_323] (rows=84 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_321] (rows=84 width=104) + Filter Operator [FIL_322] (rows=84 width=104) predicate:web_site_sk is not null - TableScan [TS_69] (rows=84 width=104) + TableScan [TS_70] (rows=84 width=104) default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_site_id"] <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_75] + SHUFFLE [RS_76] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_228] (rows=30966059 width=447) - Conds:Union 26._col1=RS_283._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_229] (rows=30966059 width=447) + Conds:Union 26._col1=RS_284._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_283] + SHUFFLE [RS_284] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_278] + Please refer to the previous Select Operator [SEL_279] <-Union 26 [SIMPLE_EDGE] <-Map 25 [CONTAINS] vectorized - Reduce Output Operator [RS_341] + Reduce Output Operator [RS_342] PartitionCols:_col1 - Select Operator [SEL_340] (rows=143930874 width=455) + Select Operator [SEL_341] (rows=143930874 width=455) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_339] (rows=143930874 width=231) - predicate:(ws_web_site_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_73_date_dim_d_date_sk_min) AND DynamicValue(RS_73_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_73_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_268] (rows=144002668 width=231) + Filter Operator [FIL_340] (rows=143930874 width=231) + predicate:(ws_web_site_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_74_date_dim_d_date_sk_min) AND DynamicValue(RS_74_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_74_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_269] (rows=144002668 width=231) Output:["ws_sold_date_sk","ws_web_site_sk","ws_ext_sales_price","ws_net_profit"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_338] - Group By Operator [GBY_337] (rows=1 width=12) + BROADCAST [RS_339] + Group By Operator [GBY_338] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_290] - Group By Operator [GBY_287] (rows=1 width=12) + SHUFFLE [RS_291] + Group By Operator [GBY_288] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_284] (rows=8116 width=4) + Select Operator [SEL_285] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_278] + Please refer to the previous Select Operator [SEL_279] <-Reducer 28 [CONTAINS] - Reduce Output Operator [RS_276] + Reduce Output Operator [RS_277] PartitionCols:_col1 - Select Operator [SEL_274] (rows=134782734 width=454) + Select Operator [SEL_275] (rows=134782734 width=454) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_273] (rows=134782734 width=230) - Conds:RS_344._col0, _col2=RS_347._col1, _col2(Inner),Output:["_col1","_col3","_col6","_col7"] + Merge Join Operator [MERGEJOIN_274] (rows=134782734 width=230) + Conds:RS_345._col0, _col2=RS_348._col1, _col2(Inner),Output:["_col1","_col3","_col6","_col7"] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_344] + SHUFFLE [RS_345] PartitionCols:_col0, _col2 - Select Operator [SEL_343] (rows=143966669 width=11) + Select Operator [SEL_344] (rows=143966669 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_342] (rows=143966669 width=11) + Filter Operator [FIL_343] (rows=143966669 width=11) predicate:(ws_web_site_sk is not null and ws_item_sk is not null and ws_order_number is not null) - TableScan [TS_54] (rows=144002668 width=11) + TableScan [TS_55] (rows=144002668 width=11) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_item_sk","ws_web_site_sk","ws_order_number"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_347] + SHUFFLE [RS_348] PartitionCols:_col1, _col2 - Select Operator [SEL_346] (rows=13749816 width=225) + Select Operator [SEL_347] (rows=13749816 width=225) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_345] (rows=13749816 width=225) + Filter Operator [FIL_346] (rows=13749816 width=225) predicate:(wr_returned_date_sk is not null and wr_item_sk is not null and wr_order_number is not null) - TableScan [TS_57] (rows=14398467 width=225) + TableScan [TS_58] (rows=14398467 width=225) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_item_sk","wr_order_number","wr_return_amt","wr_net_loss"] <-Reducer 5 [CONTAINS] vectorized - Reduce Output Operator [RS_303] + Reduce Output Operator [RS_304] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_302] (rows=59581 width=627) + Group By Operator [GBY_303] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_301] (rows=39721 width=618) + Top N Key Operator [TNK_302] (rows=39721 width=618) keys:_col0, _col1,top n:100 - Select Operator [SEL_300] (rows=822 width=617) + Select Operator [SEL_301] (rows=822 width=617) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_299] (rows=822 width=548) + Group By Operator [GBY_300] (rows=822 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col0 Group By Operator [GBY_21] (rows=78090 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 - Merge Join Operator [MERGEJOIN_225] (rows=64325014 width=376) - Conds:RS_17._col0=RS_298._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] + Merge Join Operator [MERGEJOIN_226] (rows=64325014 width=376) + Conds:RS_17._col0=RS_299._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] + SHUFFLE [RS_299] PartitionCols:_col0 - Select Operator [SEL_297] (rows=1704 width=104) + Select Operator [SEL_298] (rows=1704 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_296] (rows=1704 width=104) + Filter Operator [FIL_297] (rows=1704 width=104) predicate:s_store_sk is not null TableScan [TS_11] (rows=1704 width=104) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_224] (rows=64325014 width=277) - Conds:Union 2._col1=RS_279._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_225] (rows=64325014 width=277) + Conds:Union 2._col1=RS_280._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_279] + SHUFFLE [RS_280] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_278] + Please refer to the previous Select Operator [SEL_279] <-Union 2 [SIMPLE_EDGE] <-Map 1 [CONTAINS] vectorized - Reduce Output Operator [RS_295] + Reduce Output Operator [RS_296] PartitionCols:_col1 - Select Operator [SEL_294] (rows=525329897 width=445) + Select Operator [SEL_295] (rows=525329897 width=445) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_293] (rows=525329897 width=221) + Filter Operator [FIL_294] (rows=525329897 width=221) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_230] (rows=575995635 width=221) + TableScan [TS_231] (rows=575995635 width=221) Output:["ss_sold_date_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_292] - Group By Operator [GBY_291] (rows=1 width=12) + BROADCAST [RS_293] + Group By Operator [GBY_292] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_288] - Group By Operator [GBY_285] (rows=1 width=12) + SHUFFLE [RS_289] + Group By Operator [GBY_286] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_280] (rows=8116 width=4) + Select Operator [SEL_281] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_278] + Please refer to the previous Select Operator [SEL_279] <-Map 9 [CONTAINS] vectorized - Reduce Output Operator [RS_312] + Reduce Output Operator [RS_313] PartitionCols:_col1 - Select Operator [SEL_311] (rows=53634860 width=447) + Select Operator [SEL_312] (rows=53634860 width=447) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_310] (rows=53634860 width=223) + Filter Operator [FIL_311] (rows=53634860 width=223) predicate:(sr_store_sk is not null and sr_returned_date_sk is not null) - TableScan [TS_241] (rows=57591150 width=223) + TableScan [TS_242] (rows=57591150 width=223) Output:["sr_returned_date_sk","sr_store_sk","sr_return_amt","sr_net_loss"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query50.q.out b/ql/src/test/results/clientpositive/perf/tez/query50.q.out index 72069698c9..6ba945f884 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query50.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query50.q.out @@ -127,109 +127,109 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 10 <- Reducer 8 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Map 1 <- Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 10 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_147] - Limit [LIM_146] (rows=100 width=858) + Reducer 6 vectorized + File Output Operator [FS_148] + Limit [LIM_147] (rows=100 width=858) Number of rows:100 - Select Operator [SEL_145] (rows=11945216 width=857) + Select Operator [SEL_146] (rows=11945216 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] - Group By Operator [GBY_143] (rows=11945216 width=857) + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_145] + Group By Operator [GBY_144] (rows=11945216 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_30] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_31] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Group By Operator [GBY_29] (rows=11945216 width=857) + Group By Operator [GBY_30] (rows=11945216 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Select Operator [SEL_27] (rows=11945216 width=821) + Select Operator [SEL_28] (rows=11945216 width=821) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - Top N Key Operator [TNK_59] (rows=11945216 width=821) + Top N Key Operator [TNK_60] (rows=11945216 width=821) keys:_col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21,top n:100 - Merge Join Operator [MERGEJOIN_125] (rows=11945216 width=821) - Conds:RS_24._col8=RS_142._col0(Inner),Output:["_col0","_col5","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] + Merge Join Operator [MERGEJOIN_126] (rows=11945216 width=821) + Conds:RS_25._col3=RS_143._col0(Inner),Output:["_col0","_col5","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_142] + SHUFFLE [RS_143] PartitionCols:_col0 - Select Operator [SEL_141] (rows=1704 width=821) + Select Operator [SEL_142] (rows=1704 width=821) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_140] (rows=1704 width=821) + Filter Operator [FIL_141] (rows=1704 width=821) predicate:s_store_sk is not null - TableScan [TS_12] (rows=1704 width=821) + TableScan [TS_16] (rows=1704 width=821) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_company_id","s_street_number","s_street_name","s_street_type","s_suite_number","s_city","s_county","s_state","s_zip"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col8 - Merge Join Operator [MERGEJOIN_124] (rows=11945216 width=3) - Conds:RS_21._col5=RS_139._col0(Inner),Output:["_col0","_col5","_col8"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_125] (rows=11945216 width=3) + Conds:RS_22._col0=RS_140._col0(Inner),Output:["_col0","_col3","_col5"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_139] + SHUFFLE [RS_140] PartitionCols:_col0 - Select Operator [SEL_138] (rows=73049 width=4) + Select Operator [SEL_139] (rows=73049 width=4) Output:["_col0"] - Filter Operator [FIL_137] (rows=73049 width=4) + Filter Operator [FIL_138] (rows=73049 width=4) predicate:d_date_sk is not null - TableScan [TS_9] (rows=73049 width=4) + TableScan [TS_13] (rows=73049 width=4) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_123] (rows=11945216 width=3) - Conds:RS_18._col1, _col2, _col3=RS_136._col1, _col2, _col4(Inner),Output:["_col0","_col5","_col8"] - <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_18] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_124] (rows=11945216 width=3) + Conds:RS_137._col1, _col2, _col4=RS_20._col1, _col2, _col3(Inner),Output:["_col0","_col3","_col5"] + <-Reducer 8 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_20] PartitionCols:_col1, _col2, _col3 - Merge Join Operator [MERGEJOIN_122] (rows=1339446 width=8) - Conds:RS_128._col0=RS_131._col0(Inner),Output:["_col0","_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] - PartitionCols:_col0 - Select Operator [SEL_127] (rows=53632139 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_126] (rows=53632139 width=15) - predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null and sr_item_sk is not null) - TableScan [TS_0] (rows=57591150 width=15) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_131] + Merge Join Operator [MERGEJOIN_123] (rows=1339446 width=8) + Conds:RS_129._col0=RS_132._col0(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_132] PartitionCols:_col0 - Select Operator [SEL_130] (rows=50 width=4) + Select Operator [SEL_131] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_129] (rows=50 width=12) + Filter Operator [FIL_130] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 9) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=12) + TableScan [TS_6] (rows=73049 width=12) default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_136] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_129] + PartitionCols:_col0 + Select Operator [SEL_128] (rows=53632139 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_127] (rows=53632139 width=15) + predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null and sr_item_sk is not null) + TableScan [TS_3] (rows=57591150 width=15) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_137] PartitionCols:_col1, _col2, _col4 - Select Operator [SEL_135] (rows=501694138 width=19) + Select Operator [SEL_136] (rows=501694138 width=19) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_134] (rows=501694138 width=19) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_ticket_number is not null and ss_item_sk is not null and ss_ticket_number BETWEEN DynamicValue(RS_18_store_returns_sr_ticket_number_min) AND DynamicValue(RS_18_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_18_store_returns_sr_ticket_number_bloom_filter))) - TableScan [TS_6] (rows=575995635 width=19) + Filter Operator [FIL_135] (rows=501694138 width=19) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_ticket_number is not null and ss_item_sk is not null and ss_ticket_number BETWEEN DynamicValue(RS_20_store_returns_sr_ticket_number_min) AND DynamicValue(RS_20_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_20_store_returns_sr_ticket_number_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=19) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_133] - Group By Operator [GBY_132] (rows=1 width=12) + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_134] + Group By Operator [GBY_133] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_99] - Group By Operator [GBY_98] (rows=1 width=12) + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_75] + Group By Operator [GBY_74] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_97] (rows=1339446 width=8) + Select Operator [SEL_73] (rows=1339446 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_122] + Please refer to the previous Merge Join Operator [MERGEJOIN_123] diff --git a/ql/src/test/results/clientpositive/perf/tez/query53.q.out b/ql/src/test/results/clientpositive/perf/tez/query53.q.out index 2d2c0c374e..c318f8cd31 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query53.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query53.q.out @@ -65,102 +65,102 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Map 5 <- Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 7 <- Map 10 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_107] - Limit [LIM_106] (rows=25 width=228) + Reducer 4 vectorized + File Output Operator [FS_108] + Limit [LIM_107] (rows=25 width=228) Number of rows:100 - Select Operator [SEL_105] (rows=25 width=228) + Select Operator [SEL_106] (rows=25 width=228) Output:["_col0","_col1","_col2"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_33] - Select Operator [SEL_30] (rows=25 width=228) + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_34] + Select Operator [SEL_31] (rows=25 width=228) Output:["_col0","_col1","_col2"] - Top N Key Operator [TNK_52] (rows=25 width=228) + Top N Key Operator [TNK_53] (rows=25 width=228) keys:avg_window_0, _col2, _col0,top n:100 - Filter Operator [FIL_46] (rows=25 width=228) + Filter Operator [FIL_47] (rows=25 width=228) predicate:CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (false) END - Select Operator [SEL_29] (rows=50 width=116) + Select Operator [SEL_30] (rows=50 width=116) Output:["avg_window_0","_col0","_col2"] - PTF Operator [PTF_28] (rows=50 width=116) + PTF Operator [PTF_29] (rows=50 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST","partition by:":"_col0"}] - Select Operator [SEL_25] (rows=50 width=116) + Select Operator [SEL_26] (rows=50 width=116) Output:["_col0","_col2"] - Group By Operator [GBY_24] (rows=50 width=120) + Group By Operator [GBY_25] (rows=50 width=120) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_23] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col0 - Group By Operator [GBY_22] (rows=50 width=120) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col5, _col7 - Merge Join Operator [MERGEJOIN_85] (rows=98800 width=8) - Conds:RS_18._col2=RS_102._col0(Inner),Output:["_col3","_col5","_col7"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_102] + Group By Operator [GBY_23] (rows=50 width=120) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col4)"],keys:_col6, _col8 + Merge Join Operator [MERGEJOIN_86] (rows=98800 width=8) + Conds:RS_89._col0=RS_20._col2(Inner),Output:["_col4","_col6","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_89] PartitionCols:_col0 - Select Operator [SEL_101] (rows=1704 width=4) + Select Operator [SEL_88] (rows=1704 width=4) Output:["_col0"] - Filter Operator [FIL_100] (rows=1704 width=4) + Filter Operator [FIL_87] (rows=1704 width=4) predicate:s_store_sk is not null - TableScan [TS_9] (rows=1704 width=4) + TableScan [TS_0] (rows=1704 width=4) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_20] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_84] (rows=98800 width=8) - Conds:RS_15._col0=RS_99._col0(Inner),Output:["_col2","_col3","_col5","_col7"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_99] + Merge Join Operator [MERGEJOIN_85] (rows=98800 width=8) + Conds:RS_15._col0=RS_103._col0(Inner),Output:["_col2","_col3","_col5","_col7"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_103] PartitionCols:_col0 - Select Operator [SEL_98] (rows=317 width=8) + Select Operator [SEL_102] (rows=317 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_97] (rows=317 width=12) + Filter Operator [FIL_101] (rows=317 width=12) predicate:((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=12) + TableScan [TS_9] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_qoy"] - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_83] (rows=569118 width=4) - Conds:RS_96._col1=RS_88._col0(Inner),Output:["_col0","_col2","_col3","_col5"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_88] + Merge Join Operator [MERGEJOIN_84] (rows=569118 width=4) + Conds:RS_100._col1=RS_92._col0(Inner),Output:["_col0","_col2","_col3","_col5"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_92] PartitionCols:_col0 - Select Operator [SEL_87] (rows=52 width=8) + Select Operator [SEL_91] (rows=52 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_86] (rows=52 width=290) + Filter Operator [FIL_90] (rows=52 width=290) predicate:((i_class) IN ('personal', 'portable', 'reference', 'self-help', 'accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and (((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) - TableScan [TS_3] (rows=462000 width=289) + TableScan [TS_6] (rows=462000 width=289) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_manufact_id"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_96] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_100] PartitionCols:_col1 - Select Operator [SEL_95] (rows=525329897 width=118) + Select Operator [SEL_99] (rows=525329897 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_94] (rows=525329897 width=118) + Filter Operator [FIL_98] (rows=525329897 width=118) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=118) + TableScan [TS_3] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_93] - Group By Operator [GBY_92] (rows=1 width=12) + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_97] + Group By Operator [GBY_96] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_91] - Group By Operator [GBY_90] (rows=1 width=12) + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_95] + Group By Operator [GBY_94] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_89] (rows=52 width=4) + Select Operator [SEL_93] (rows=52 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_87] + Please refer to the previous Select Operator [SEL_91] diff --git a/ql/src/test/results/clientpositive/perf/tez/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/query54.q.out index ec9de83f19..ceaedcb8fd 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[274][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[275][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[276][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[274][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[275][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[276][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[277][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 7' is a cross product PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk @@ -133,17 +133,17 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 16 <- Reducer 24 (BROADCAST_EDGE), Union 17 (CONTAINS) -Map 22 <- Reducer 24 (BROADCAST_EDGE), Union 17 (CONTAINS) +Map 19 <- Reducer 25 (BROADCAST_EDGE), Union 20 (CONTAINS) +Map 23 <- Reducer 25 (BROADCAST_EDGE), Union 20 (CONTAINS) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 18 <- Map 23 (SIMPLE_EDGE), Union 17 (SIMPLE_EDGE) -Reducer 19 <- Map 25 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 20 <- Map 26 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (SIMPLE_EDGE) -Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE) +Reducer 21 <- Map 24 (SIMPLE_EDGE), Union 20 (SIMPLE_EDGE) +Reducer 22 <- Map 26 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) Reducer 28 <- Map 27 (SIMPLE_EDGE) Reducer 29 <- Reducer 28 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) @@ -163,266 +163,266 @@ Stage-0 limit:100 Stage-1 Reducer 10 vectorized - File Output Operator [FS_360] - Limit [LIM_359] (rows=1 width=16) + File Output Operator [FS_361] + Limit [LIM_360] (rows=1 width=16) Number of rows:100 - Select Operator [SEL_358] (rows=1 width=16) + Select Operator [SEL_359] (rows=1 width=16) Output:["_col0","_col1","_col2"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_357] - Select Operator [SEL_356] (rows=1 width=16) + SHUFFLE [RS_358] + Select Operator [SEL_357] (rows=1 width=16) Output:["_col0","_col1","_col2"] - Top N Key Operator [TNK_355] (rows=1 width=12) + Top N Key Operator [TNK_356] (rows=1 width=12) keys:_col0, _col1,top n:100 - Group By Operator [GBY_354] (rows=1 width=12) + Group By Operator [GBY_355] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_353] + SHUFFLE [RS_354] PartitionCols:_col0 - Group By Operator [GBY_352] (rows=1 width=12) + Group By Operator [GBY_353] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_351] (rows=1 width=116) + Select Operator [SEL_352] (rows=1 width=116) Output:["_col0"] - Top N Key Operator [TNK_350] (rows=1 width=116) + Top N Key Operator [TNK_351] (rows=1 width=116) keys:UDFToInteger((_col1 / 50)),top n:100 - Group By Operator [GBY_349] (rows=1 width=116) + Group By Operator [GBY_350] (rows=1 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_118] + SHUFFLE [RS_119] PartitionCols:_col0 - Group By Operator [GBY_117] (rows=17670 width=116) + Group By Operator [GBY_118] (rows=17670 width=116) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col10 - Select Operator [SEL_116] (rows=36481068608 width=123) + Select Operator [SEL_117] (rows=36481068608 width=123) Output:["_col2","_col10"] - Filter Operator [FIL_115] (rows=36481068608 width=123) + Filter Operator [FIL_116] (rows=36481068608 width=123) predicate:(_col4 <= _col15) - Merge Join Operator [MERGEJOIN_276] (rows=109443205825 width=123) + Merge Join Operator [MERGEJOIN_277] (rows=109443205825 width=123) Conds:(Inner),Output:["_col2","_col4","_col10","_col15"] <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_348] - Group By Operator [GBY_347] (rows=25 width=4) + PARTITION_ONLY_SHUFFLE [RS_349] + Group By Operator [GBY_348] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_328] + SHUFFLE [RS_329] PartitionCols:_col0 - Group By Operator [GBY_324] (rows=25 width=4) + Group By Operator [GBY_325] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_320] (rows=50 width=12) + Select Operator [SEL_321] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_316] (rows=50 width=12) + Filter Operator [FIL_317] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 3) and d_month_seq is not null) - TableScan [TS_50] (rows=73049 width=12) + TableScan [TS_51] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_112] - Filter Operator [FIL_111] (rows=4377728233 width=123) + PARTITION_ONLY_SHUFFLE [RS_113] + Filter Operator [FIL_112] (rows=4377728233 width=123) predicate:(_col14 <= _col4) - Merge Join Operator [MERGEJOIN_275] (rows=13133184700 width=123) + Merge Join Operator [MERGEJOIN_276] (rows=13133184700 width=123) Conds:(Inner),Output:["_col2","_col4","_col10","_col14"] <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_346] - Group By Operator [GBY_345] (rows=25 width=4) + PARTITION_ONLY_SHUFFLE [RS_347] + Group By Operator [GBY_346] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_327] + SHUFFLE [RS_328] PartitionCols:_col0 - Group By Operator [GBY_323] (rows=25 width=4) + Group By Operator [GBY_324] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_319] (rows=50 width=12) + Select Operator [SEL_320] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_315] (rows=50 width=12) + Filter Operator [FIL_316] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 3) and d_month_seq is not null) - Please refer to the previous TableScan [TS_50] + Please refer to the previous TableScan [TS_51] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_108] - Merge Join Operator [MERGEJOIN_274] (rows=525327388 width=114) + PARTITION_ONLY_SHUFFLE [RS_109] + Merge Join Operator [MERGEJOIN_275] (rows=525327388 width=114) Conds:(Inner),Output:["_col2","_col4","_col10"] <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_344] - Select Operator [SEL_343] (rows=1 width=8) - Filter Operator [FIL_342] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_345] + Select Operator [SEL_344] (rows=1 width=8) + Filter Operator [FIL_343] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_341] (rows=1 width=8) + Group By Operator [GBY_342] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_340] - Group By Operator [GBY_339] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_341] + Group By Operator [GBY_340] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_338] (rows=25 width=4) - Group By Operator [GBY_337] (rows=25 width=4) + Select Operator [SEL_339] (rows=25 width=4) + Group By Operator [GBY_338] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_326] + SHUFFLE [RS_327] PartitionCols:_col0 - Group By Operator [GBY_322] (rows=25 width=4) + Group By Operator [GBY_323] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_318] (rows=50 width=12) + Select Operator [SEL_319] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_314] (rows=50 width=12) + Filter Operator [FIL_315] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 3)) - Please refer to the previous TableScan [TS_50] + Please refer to the previous TableScan [TS_51] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_105] - Merge Join Operator [MERGEJOIN_273] (rows=525327388 width=114) + PARTITION_ONLY_SHUFFLE [RS_106] + Merge Join Operator [MERGEJOIN_274] (rows=525327388 width=114) Conds:(Inner),Output:["_col2","_col4","_col10"] <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_336] - Select Operator [SEL_335] (rows=1 width=8) - Filter Operator [FIL_334] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_337] + Select Operator [SEL_336] (rows=1 width=8) + Filter Operator [FIL_335] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_333] (rows=1 width=8) + Group By Operator [GBY_334] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_332] - Group By Operator [GBY_331] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_333] + Group By Operator [GBY_332] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_330] (rows=25 width=4) - Group By Operator [GBY_329] (rows=25 width=4) + Select Operator [SEL_331] (rows=25 width=4) + Group By Operator [GBY_330] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_325] + SHUFFLE [RS_326] PartitionCols:_col0 - Group By Operator [GBY_321] (rows=25 width=4) + Group By Operator [GBY_322] (rows=25 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_317] (rows=50 width=12) + Select Operator [SEL_318] (rows=50 width=12) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_314] + Please refer to the previous Filter Operator [FIL_315] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_102] - Merge Join Operator [MERGEJOIN_272] (rows=525327388 width=114) - Conds:RS_99._col1=RS_100._col5(Inner),Output:["_col2","_col4","_col10"] + PARTITION_ONLY_SHUFFLE [RS_103] + Merge Join Operator [MERGEJOIN_273] (rows=525327388 width=114) + Conds:RS_100._col1=RS_101._col5(Inner),Output:["_col2","_col4","_col10"] <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_100] + SHUFFLE [RS_101] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_271] (rows=55046 width=4) - Conds:RS_46._col0=RS_313._col1(Inner),Output:["_col5"] + Merge Join Operator [MERGEJOIN_272] (rows=55046 width=4) + Conds:RS_47._col0=RS_314._col1(Inner),Output:["_col5"] <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_46] + SHUFFLE [RS_47] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_267] (rows=39720279 width=4) - Conds:RS_295._col1, _col2=RS_298._col0, _col1(Inner),Output:["_col0"] + Merge Join Operator [MERGEJOIN_268] (rows=39720279 width=4) + Conds:RS_296._col1, _col2=RS_299._col0, _col1(Inner),Output:["_col0"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_295] + SHUFFLE [RS_296] PartitionCols:_col1, _col2 - Select Operator [SEL_294] (rows=40000000 width=188) + Select Operator [SEL_295] (rows=40000000 width=188) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_293] (rows=40000000 width=188) + Filter Operator [FIL_294] (rows=40000000 width=188) predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null) TableScan [TS_6] (rows=40000000 width=188) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] + SHUFFLE [RS_299] PartitionCols:_col0, _col1 - Select Operator [SEL_297] (rows=1704 width=184) + Select Operator [SEL_298] (rows=1704 width=184) Output:["_col0","_col1"] - Filter Operator [FIL_296] (rows=1704 width=184) + Filter Operator [FIL_297] (rows=1704 width=184) predicate:(s_county is not null and s_state is not null) TableScan [TS_9] (rows=1704 width=184) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] - <-Reducer 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_313] + <-Reducer 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_314] PartitionCols:_col1 - Select Operator [SEL_312] (rows=55046 width=8) + Select Operator [SEL_313] (rows=55046 width=8) Output:["_col0","_col1"] - Group By Operator [GBY_311] (rows=55046 width=8) + Group By Operator [GBY_312] (rows=55046 width=8) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_40] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_41] PartitionCols:_col0, _col1 - Group By Operator [GBY_39] (rows=55046 width=8) - Output:["_col0","_col1"],keys:_col6, _col5 - Merge Join Operator [MERGEJOIN_270] (rows=110092 width=8) - Conds:RS_35._col1=RS_310._col0(Inner),Output:["_col5","_col6"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_310] + Group By Operator [GBY_40] (rows=55046 width=8) + Output:["_col0","_col1"],keys:_col1, _col0 + Merge Join Operator [MERGEJOIN_271] (rows=110092 width=8) + Conds:RS_302._col0=RS_37._col1(Inner),Output:["_col0","_col1"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_302] PartitionCols:_col0 - Select Operator [SEL_309] (rows=80000000 width=8) + Select Operator [SEL_301] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_308] (rows=80000000 width=8) + Filter Operator [FIL_300] (rows=80000000 width=8) predicate:(c_customer_sk is not null and c_current_addr_sk is not null) - TableScan [TS_26] (rows=80000000 width=8) + TableScan [TS_12] (rows=80000000 width=8) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_35] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_37] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_269] (rows=110092 width=0) - Conds:RS_32._col2=RS_307._col0(Inner),Output:["_col1"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_307] + Merge Join Operator [MERGEJOIN_270] (rows=110092 width=0) + Conds:RS_32._col2=RS_311._col0(Inner),Output:["_col1"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_311] PartitionCols:_col0 - Select Operator [SEL_306] (rows=453 width=4) + Select Operator [SEL_310] (rows=453 width=4) Output:["_col0"] - Filter Operator [FIL_305] (rows=453 width=186) + Filter Operator [FIL_309] (rows=453 width=186) predicate:((i_class = 'consignment') and (i_category = 'Jewelry') and i_item_sk is not null) - TableScan [TS_23] (rows=462000 width=186) + TableScan [TS_26] (rows=462000 width=186) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] - <-Reducer 18 [SIMPLE_EDGE] + <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_268] (rows=11665117 width=7) - Conds:Union 17._col0=RS_301._col0(Inner),Output:["_col1","_col2"] - <-Map 23 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_301] + Merge Join Operator [MERGEJOIN_269] (rows=11665117 width=7) + Conds:Union 20._col0=RS_305._col0(Inner),Output:["_col1","_col2"] + <-Map 24 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_305] PartitionCols:_col0 - Select Operator [SEL_300] (rows=50 width=4) + Select Operator [SEL_304] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_299] (rows=50 width=12) + Filter Operator [FIL_303] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 3) and d_date_sk is not null) - TableScan [TS_20] (rows=73049 width=12) + TableScan [TS_23] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Union 17 [SIMPLE_EDGE] - <-Map 16 [CONTAINS] vectorized - Reduce Output Operator [RS_366] + <-Union 20 [SIMPLE_EDGE] + <-Map 19 [CONTAINS] vectorized + Reduce Output Operator [RS_367] PartitionCols:_col0 - Select Operator [SEL_365] (rows=285117831 width=11) + Select Operator [SEL_366] (rows=285117831 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_364] (rows=285117831 width=11) + Filter Operator [FIL_365] (rows=285117831 width=11) predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_277] (rows=287989836 width=11) + TableScan [TS_278] (rows=287989836 width=11) Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_362] - Group By Operator [GBY_361] (rows=1 width=12) + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_363] + Group By Operator [GBY_362] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_304] - Group By Operator [GBY_303] (rows=1 width=12) + <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_308] + Group By Operator [GBY_307] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_302] (rows=50 width=4) + Select Operator [SEL_306] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_300] - <-Map 22 [CONTAINS] vectorized - Reduce Output Operator [RS_369] + Please refer to the previous Select Operator [SEL_304] + <-Map 23 [CONTAINS] vectorized + Reduce Output Operator [RS_370] PartitionCols:_col0 - Select Operator [SEL_368] (rows=143930993 width=11) + Select Operator [SEL_369] (rows=143930993 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_367] (rows=143930993 width=11) + Filter Operator [FIL_368] (rows=143930993 width=11) predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_282] (rows=144002668 width=11) + TableScan [TS_283] (rows=144002668 width=11) Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_363] - Please refer to the previous Group By Operator [GBY_361] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_364] + Please refer to the previous Group By Operator [GBY_362] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_99] + SHUFFLE [RS_100] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_266] (rows=525327388 width=114) - Conds:RS_289._col0=RS_292._col0(Inner),Output:["_col1","_col2","_col4"] + Merge Join Operator [MERGEJOIN_267] (rows=525327388 width=114) + Conds:RS_290._col0=RS_293._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] + SHUFFLE [RS_290] PartitionCols:_col0 - Select Operator [SEL_288] (rows=525327388 width=114) + Select Operator [SEL_289] (rows=525327388 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_287] (rows=525327388 width=114) + Filter Operator [FIL_288] (rows=525327388 width=114) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_292] + SHUFFLE [RS_293] PartitionCols:_col0 - Select Operator [SEL_291] (rows=73049 width=8) + Select Operator [SEL_292] (rows=73049 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_290] (rows=73049 width=8) + Filter Operator [FIL_291] (rows=73049 width=8) predicate:(d_date_sk is not null and d_month_seq is not null) TableScan [TS_3] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query56.q.out b/ql/src/test/results/clientpositive/perf/tez/query56.q.out index a7558ba16a..d7512e8721 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query56.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query56.q.out @@ -149,27 +149,27 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 14 <- Reducer 18 (BROADCAST_EDGE) -Map 26 <- Reducer 21 (BROADCAST_EDGE) -Map 27 <- Reducer 24 (BROADCAST_EDGE) -Reducer 10 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Map 18 <- Reducer 21 (BROADCAST_EDGE) +Map 26 <- Reducer 23 (BROADCAST_EDGE) +Map 27 <- Reducer 25 (BROADCAST_EDGE) +Reducer 10 <- Reducer 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 13 <- Map 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 16 <- Map 25 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 16 <- Map 14 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 17 <- Map 14 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 20 <- Map 25 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 17 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 23 <- Map 25 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 20 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 23 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 20 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 25 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 8 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 5 (CONTAINS) Stage-0 @@ -177,226 +177,220 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_359] - Limit [LIM_358] (rows=100 width=212) + File Output Operator [FS_362] + Limit [LIM_361] (rows=100 width=212) Number of rows:100 - Select Operator [SEL_357] (rows=355 width=212) + Select Operator [SEL_360] (rows=355 width=212) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_356] - Top N Key Operator [TNK_355] (rows=355 width=212) + SHUFFLE [RS_359] + Top N Key Operator [TNK_358] (rows=355 width=212) keys:_col1,top n:100 - Group By Operator [GBY_354] (rows=355 width=212) + Group By Operator [GBY_357] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Union 5 [SIMPLE_EDGE] <-Reducer 11 [CONTAINS] vectorized - Reduce Output Operator [RS_375] + Reduce Output Operator [RS_378] PartitionCols:_col0 - Group By Operator [GBY_374] (rows=355 width=212) + Group By Operator [GBY_377] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_373] (rows=355 width=212) + Group By Operator [GBY_376] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_109] + SHUFFLE [RS_112] PartitionCols:_col0 - Group By Operator [GBY_108] (rows=355 width=212) + Group By Operator [GBY_111] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_305] (rows=172427 width=188) - Conds:RS_104._col0=RS_105._col2(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_308] (rows=172427 width=188) + Conds:RS_107._col0=RS_108._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_104] + SHUFFLE [RS_107] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_294] (rows=15609 width=104) - Conds:RS_320._col1=RS_326._col0(Inner),Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_297] (rows=15609 width=104) + Conds:RS_323._col1=RS_329._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + SHUFFLE [RS_323] PartitionCols:_col1 - Select Operator [SEL_319] (rows=462000 width=104) + Select Operator [SEL_322] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_318] (rows=462000 width=104) + Filter Operator [FIL_321] (rows=462000 width=104) predicate:(i_item_id is not null and i_item_sk is not null) TableScan [TS_0] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_326] + SHUFFLE [RS_329] PartitionCols:_col0 - Group By Operator [GBY_325] (rows=10500 width=100) + Group By Operator [GBY_328] (rows=10500 width=100) Output:["_col0"],keys:KEY._col0 <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_324] + SHUFFLE [RS_327] PartitionCols:_col0 - Group By Operator [GBY_323] (rows=10500 width=100) + Group By Operator [GBY_326] (rows=10500 width=100) Output:["_col0"],keys:i_item_id - Select Operator [SEL_322] (rows=21000 width=189) + Select Operator [SEL_325] (rows=21000 width=189) Output:["i_item_id"] - Filter Operator [FIL_321] (rows=21000 width=189) + Filter Operator [FIL_324] (rows=21000 width=189) predicate:((i_color) IN ('orchid', 'chiffon', 'lace') and i_item_id is not null) TableScan [TS_3] (rows=462000 width=189) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_color"] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_105] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_108] PartitionCols:_col2 - Select Operator [SEL_100] (rows=788222 width=110) - Output:["_col2","_col4"] - Merge Join Operator [MERGEJOIN_302] (rows=788222 width=110) - Conds:RS_97._col2=RS_350._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_350] - PartitionCols:_col0 - Select Operator [SEL_347] (rows=8000000 width=4) - Output:["_col0"] - Filter Operator [FIL_346] (rows=8000000 width=112) - predicate:((ca_gmt_offset = -8) and ca_address_sk is not null) - TableScan [TS_16] (rows=40000000 width=112) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_97] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_301] (rows=3941109 width=118) - Conds:RS_372._col0=RS_333._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_333] - PartitionCols:_col0 - Select Operator [SEL_328] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_327] (rows=50 width=12) - predicate:((d_year = 2000) and (d_moy = 1) and d_date_sk is not null) - TableScan [TS_13] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_372] - PartitionCols:_col0 - Select Operator [SEL_371] (rows=143931246 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_370] (rows=143931246 width=123) - predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_95_date_dim_d_date_sk_min) AND DynamicValue(RS_95_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_95_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_85] (rows=144002668 width=123) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_369] - Group By Operator [GBY_368] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_340] - Group By Operator [GBY_337] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_334] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_328] + Merge Join Operator [MERGEJOIN_305] (rows=788222 width=110) + Conds:RS_334._col0=RS_101._col2(Inner),Output:["_col2","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_334] + PartitionCols:_col0 + Select Operator [SEL_331] (rows=8000000 width=4) + Output:["_col0"] + Filter Operator [FIL_330] (rows=8000000 width=112) + predicate:((ca_gmt_offset = -8) and ca_address_sk is not null) + TableScan [TS_10] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_304] (rows=3941109 width=118) + Conds:RS_375._col0=RS_341._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_341] + PartitionCols:_col0 + Select Operator [SEL_336] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_335] (rows=50 width=12) + predicate:((d_year = 2000) and (d_moy = 1) and d_date_sk is not null) + TableScan [TS_16] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_375] + PartitionCols:_col0 + Select Operator [SEL_374] (rows=143931246 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_373] (rows=143931246 width=123) + predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_97_date_dim_d_date_sk_min) AND DynamicValue(RS_97_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_97_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_90] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_372] + Group By Operator [GBY_371] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_348] + Group By Operator [GBY_345] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_342] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_336] <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_353] + Reduce Output Operator [RS_356] PartitionCols:_col0 - Group By Operator [GBY_352] (rows=355 width=212) + Group By Operator [GBY_355] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_351] (rows=355 width=212) + Group By Operator [GBY_354] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_34] + SHUFFLE [RS_35] PartitionCols:_col0 - Group By Operator [GBY_33] (rows=355 width=212) + Group By Operator [GBY_34] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_303] (rows=629332 width=100) - Conds:RS_29._col0=RS_30._col2(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_306] (rows=629332 width=100) + Conds:RS_30._col0=RS_31._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_294] - <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_30] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_297] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_31] PartitionCols:_col2 - Select Operator [SEL_25] (rows=2876890 width=4) - Output:["_col2","_col4"] - Merge Join Operator [MERGEJOIN_296] (rows=2876890 width=4) - Conds:RS_22._col2=RS_348._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_348] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_347] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_22] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_295] (rows=14384447 width=4) - Conds:RS_345._col0=RS_329._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_329] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_328] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_345] - PartitionCols:_col0 - Select Operator [SEL_344] (rows=525327191 width=118) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_343] (rows=525327191 width=118) - predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_10] (rows=575995635 width=118) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_342] - Group By Operator [GBY_341] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_338] - Group By Operator [GBY_335] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_330] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_328] + Merge Join Operator [MERGEJOIN_299] (rows=2876890 width=4) + Conds:RS_332._col0=RS_24._col2(Inner),Output:["_col2","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_332] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_331] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_298] (rows=14384447 width=4) + Conds:RS_353._col0=RS_337._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_337] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_336] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_353] + PartitionCols:_col0 + Select Operator [SEL_352] (rows=525327191 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_351] (rows=525327191 width=118) + predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_13] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_350] + Group By Operator [GBY_349] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_346] + Group By Operator [GBY_343] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_338] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_336] <-Reducer 9 [CONTAINS] vectorized - Reduce Output Operator [RS_367] + Reduce Output Operator [RS_370] PartitionCols:_col0 - Group By Operator [GBY_366] (rows=355 width=212) + Group By Operator [GBY_369] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_365] (rows=355 width=212) + Group By Operator [GBY_368] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_71] + SHUFFLE [RS_73] PartitionCols:_col0 - Group By Operator [GBY_70] (rows=355 width=212) + Group By Operator [GBY_72] (rows=355 width=212) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_304] (rows=339151 width=100) - Conds:RS_66._col0=RS_67._col3(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_307] (rows=339151 width=100) + Conds:RS_68._col0=RS_69._col3(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_66] + SHUFFLE [RS_68] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_294] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_67] + Please refer to the previous Merge Join Operator [MERGEJOIN_297] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_69] PartitionCols:_col3 - Select Operator [SEL_62] (rows=1550375 width=13) - Output:["_col3","_col4"] - Merge Join Operator [MERGEJOIN_299] (rows=1550375 width=13) - Conds:RS_59._col1=RS_349._col0(Inner),Output:["_col2","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_349] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_347] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_59] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_298] (rows=7751872 width=98) - Conds:RS_364._col0=RS_331._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_331] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_328] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_364] - PartitionCols:_col0 - Select Operator [SEL_363] (rows=285117733 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_362] (rows=285117733 width=123) - predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_47] (rows=287989836 width=123) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_361] - Group By Operator [GBY_360] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_339] - Group By Operator [GBY_336] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_332] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_328] + Merge Join Operator [MERGEJOIN_302] (rows=1550375 width=13) + Conds:RS_333._col0=RS_62._col1(Inner),Output:["_col3","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_333] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_331] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_62] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_301] (rows=7751872 width=98) + Conds:RS_367._col0=RS_339._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_339] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_336] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_367] + PartitionCols:_col0 + Select Operator [SEL_366] (rows=285117733 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_365] (rows=285117733 width=123) + predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_58_date_dim_d_date_sk_min) AND DynamicValue(RS_58_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_58_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_51] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_364] + Group By Operator [GBY_363] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_347] + Group By Operator [GBY_344] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_340] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_336] diff --git a/ql/src/test/results/clientpositive/perf/tez/query57.q.out b/ql/src/test/results/clientpositive/perf/tez/query57.q.out index 72048de519..205cc19361 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query57.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query57.q.out @@ -105,158 +105,158 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 13 (BROADCAST_EDGE) -Reducer 10 <- Reducer 5 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 3 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Map 10 <- Reducer 14 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 12 <- Map 15 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 7 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 8 <- Reducer 3 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_322] - Limit [LIM_321] (rows=100 width=758) + Reducer 6 vectorized + File Output Operator [FS_326] + Limit [LIM_325] (rows=100 width=758) Number of rows:100 - Select Operator [SEL_320] (rows=397735 width=758) + Select Operator [SEL_324] (rows=397735 width=758) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_110] - Select Operator [SEL_109] (rows=397735 width=758) + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_114] + Select Operator [SEL_113] (rows=397735 width=758) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Top N Key Operator [TNK_178] (rows=397735 width=646) - keys:(_col10 - _col11), _col8,top n:100 - Merge Join Operator [MERGEJOIN_279] (rows=397735 width=646) - Conds:RS_106._col5, _col6, _col12, _col7=RS_307._col0, _col1, _col4, _col2(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col10","_col11","_col16"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_307] + Top N Key Operator [TNK_182] (rows=397735 width=646) + keys:(_col15 - _col16), _col13,top n:100 + Merge Join Operator [MERGEJOIN_283] (rows=397735 width=646) + Conds:RS_311._col0, _col1, _col4, _col2=RS_111._col5, _col6, _col12, _col7(Inner),Output:["_col3","_col8","_col10","_col11","_col13","_col14","_col15","_col16"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_311] PartitionCols:_col0, _col1, _col4, _col2 - Select Operator [SEL_305] (rows=2386410 width=404) + Select Operator [SEL_309] (rows=2386410 width=404) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_303] (rows=2386410 width=408) + Filter Operator [FIL_307] (rows=2386410 width=408) predicate:rank_window_0 is not null - PTF Operator [PTF_301] (rows=2386410 width=408) + PTF Operator [PTF_305] (rows=2386410 width=408) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4"}] - Select Operator [SEL_300] (rows=2386410 width=408) + Select Operator [SEL_304] (rows=2386410 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_302] PartitionCols:_col1, _col0, _col4 - Group By Operator [GBY_297] (rows=2386410 width=408) + Group By Operator [GBY_301] (rows=2386410 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_93] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_92] (rows=87441185 width=408) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)"],keys:_col10, _col11, _col5, _col6, _col8 - Merge Join Operator [MERGEJOIN_277] (rows=87441185 width=406) - Conds:RS_88._col2=RS_296._col0(Inner),Output:["_col3","_col5","_col6","_col8","_col10","_col11"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_296] + Group By Operator [GBY_23] (rows=87441185 width=408) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)"],keys:_col1, _col2, _col8, _col9, _col11 + Merge Join Operator [MERGEJOIN_275] (rows=87441185 width=406) + Conds:RS_286._col0=RS_20._col2(Inner),Output:["_col1","_col2","_col6","_col8","_col9","_col11"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_286] PartitionCols:_col0 - Select Operator [SEL_295] (rows=462000 width=194) + Select Operator [SEL_285] (rows=462000 width=194) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_294] (rows=462000 width=194) + Filter Operator [FIL_284] (rows=462000 width=194) predicate:(i_item_sk is not null and i_category is not null and i_brand is not null) - TableScan [TS_79] (rows=462000 width=194) + TableScan [TS_0] (rows=462000 width=194) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_category"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_88] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_20] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_276] (rows=87441185 width=220) - Conds:RS_85._col1=RS_293._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_293] + Merge Join Operator [MERGEJOIN_274] (rows=87441185 width=220) + Conds:RS_15._col1=RS_300._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col8"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_300] PartitionCols:_col0 - Select Operator [SEL_292] (rows=60 width=102) + Select Operator [SEL_299] (rows=60 width=102) Output:["_col0","_col1"] - Filter Operator [FIL_291] (rows=60 width=102) + Filter Operator [FIL_298] (rows=60 width=102) predicate:(cc_call_center_sk is not null and cc_name is not null) - TableScan [TS_76] (rows=60 width=102) + TableScan [TS_9] (rows=60 width=102) default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_name"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_85] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_275] (rows=87441185 width=126) - Conds:RS_290._col0=RS_282._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_282] + Merge Join Operator [MERGEJOIN_273] (rows=87441185 width=126) + Conds:RS_297._col0=RS_289._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_289] PartitionCols:_col0 - Select Operator [SEL_281] (rows=564 width=12) + Select Operator [SEL_288] (rows=564 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_280] (rows=564 width=12) + Filter Operator [FIL_287] (rows=564 width=12) predicate:((d_year) IN (2000, 1999, 2001) and ((d_year = 2000) or (struct(d_year,d_moy)) IN (const struct(1999,12), const struct(2001,1))) and d_date_sk is not null) - TableScan [TS_73] (rows=73049 width=12) + TableScan [TS_6] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_290] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_297] PartitionCols:_col0 - Select Operator [SEL_289] (rows=285117980 width=123) + Select Operator [SEL_296] (rows=285117980 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_288] (rows=285117980 width=123) - predicate:(cs_sold_date_sk is not null and cs_call_center_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_83_date_dim_d_date_sk_min) AND DynamicValue(RS_83_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_83_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_70] (rows=287989836 width=123) + Filter Operator [FIL_295] (rows=285117980 width=123) + predicate:(cs_sold_date_sk is not null and cs_call_center_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_3] (rows=287989836 width=123) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_call_center_sk","cs_item_sk","cs_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_287] - Group By Operator [GBY_286] (rows=1 width=12) + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_294] + Group By Operator [GBY_293] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_285] - Group By Operator [GBY_284] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_292] + Group By Operator [GBY_291] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_283] (rows=564 width=4) + Select Operator [SEL_290] (rows=564 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_281] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_106] + Please refer to the previous Select Operator [SEL_288] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_111] PartitionCols:_col5, _col6, _col12, _col7 - Merge Join Operator [MERGEJOIN_278] (rows=397735 width=636) - Conds:RS_308._col0, _col1, _col4, _col2=RS_319._col0, _col1, _col7, _col2(Inner),Output:["_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_308] + Merge Join Operator [MERGEJOIN_282] (rows=397735 width=636) + Conds:RS_312._col0, _col1, _col4, _col2=RS_323._col0, _col1, _col7, _col2(Inner),Output:["_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_312] PartitionCols:_col0, _col1, _col4, _col2 - Select Operator [SEL_306] (rows=2386410 width=404) + Select Operator [SEL_310] (rows=2386410 width=404) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_304] (rows=2386410 width=408) + Filter Operator [FIL_308] (rows=2386410 width=408) predicate:rank_window_0 is not null - PTF Operator [PTF_302] (rows=2386410 width=408) + PTF Operator [PTF_306] (rows=2386410 width=408) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS LAST, _col3 ASC NULLS LAST","partition by:":"_col1, _col0, _col4"}] - Please refer to the previous Select Operator [SEL_300] - <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] + Please refer to the previous Select Operator [SEL_304] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] PartitionCols:_col0, _col1, _col7, _col2 - Select Operator [SEL_318] (rows=397735 width=524) + Select Operator [SEL_322] (rows=397735 width=524) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_317] (rows=397735 width=524) + Filter Operator [FIL_321] (rows=397735 width=524) predicate:CASE WHEN ((_col0 > 0)) THEN (((abs((_col6 - _col0)) / _col0) > 0.1)) ELSE (false) END - Select Operator [SEL_316] (rows=795470 width=520) + Select Operator [SEL_320] (rows=795470 width=520) Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_315] (rows=795470 width=520) + Filter Operator [FIL_319] (rows=795470 width=520) predicate:((_col0 > 0) and rank_window_1 is not null and (_col3 = 2000)) - PTF Operator [PTF_314] (rows=2386410 width=520) + PTF Operator [PTF_318] (rows=2386410 width=520) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS LAST, _col4 ASC NULLS LAST","partition by:":"_col2, _col1, _col5"}] - Select Operator [SEL_313] (rows=2386410 width=520) + Select Operator [SEL_317] (rows=2386410 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_316] PartitionCols:_col1, _col0, _col4 - Select Operator [SEL_311] (rows=2386410 width=408) + Select Operator [SEL_315] (rows=2386410 width=408) Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5"] - PTF Operator [PTF_310] (rows=2386410 width=408) + PTF Operator [PTF_314] (rows=2386410 width=408) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col1, _col0, _col4, _col2"}] - Select Operator [SEL_309] (rows=2386410 width=408) + Select Operator [SEL_313] (rows=2386410 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_299] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_303] PartitionCols:_col1, _col0, _col4, _col2 - Please refer to the previous Group By Operator [GBY_297] + Please refer to the previous Group By Operator [GBY_301] diff --git a/ql/src/test/results/clientpositive/perf/tez/query58.q.out b/ql/src/test/results/clientpositive/perf/tez/query58.q.out index a66122d80d..6c3c33037b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query58.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[405][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 22' is a cross product +Warning: Shuffle Join MERGEJOIN[408][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 23' is a cross product PREHOOK: query: explain with ss_items as (select i_item_id item_id @@ -150,15 +150,15 @@ Reducer 11 <- Reducer 10 (SIMPLE_EDGE) Reducer 12 <- Map 27 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) Reducer 18 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) Reducer 19 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 25 (CUSTOM_SIMPLE_EDGE), Reducer 21 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 25 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Reducer 23 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Map 20 (CUSTOM_SIMPLE_EDGE), Reducer 25 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) @@ -171,231 +171,231 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_471] - Limit [LIM_470] (rows=1 width=884) + File Output Operator [FS_474] + Limit [LIM_473] (rows=1 width=884) Number of rows:100 - Select Operator [SEL_469] (rows=1 width=884) + Select Operator [SEL_472] (rows=1 width=884) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_163] - Select Operator [SEL_162] (rows=1 width=884) + SHUFFLE [RS_166] + Select Operator [SEL_165] (rows=1 width=884) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Top N Key Operator [TNK_260] (rows=1 width=1108) + Top N Key Operator [TNK_263] (rows=1 width=1108) keys:_col0, _col5,top n:100 - Filter Operator [FIL_158] (rows=1 width=1108) + Filter Operator [FIL_161] (rows=1 width=1108) predicate:(_col9 BETWEEN _col2 AND _col3 and _col9 BETWEEN _col6 AND _col7 and _col1 BETWEEN _col10 AND _col11 and _col5 BETWEEN _col10 AND _col11) - Merge Join Operator [MERGEJOIN_420] (rows=1 width=1108) - Conds:RS_155._col0=RS_468._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_423] (rows=1 width=1108) + Conds:RS_158._col0=RS_471._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col9","_col10","_col11"] <-Reducer 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_468] + SHUFFLE [RS_471] PartitionCols:_col0 - Select Operator [SEL_467] (rows=69 width=436) + Select Operator [SEL_470] (rows=69 width=436) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_466] (rows=69 width=212) + Group By Operator [GBY_469] (rows=69 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_147] + SHUFFLE [RS_150] PartitionCols:_col0 - Group By Operator [GBY_146] (rows=69 width=212) + Group By Operator [GBY_149] (rows=69 width=212) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_418] (rows=31537 width=100) - Conds:RS_142._col0=RS_143._col0(Inner),Output:["_col2","_col4"] + Merge Join Operator [MERGEJOIN_421] (rows=31537 width=100) + Conds:RS_145._col0=RS_146._col0(Inner),Output:["_col2","_col4"] <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_143] + SHUFFLE [RS_146] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_407] (rows=2 width=4) - Conds:RS_423._col1=RS_439._col0(Inner),Output:["_col0"] + Merge Join Operator [MERGEJOIN_410] (rows=2 width=4) + Conds:RS_426._col1=RS_442._col0(Inner),Output:["_col0"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_423] + SHUFFLE [RS_426] PartitionCols:_col1 - Select Operator [SEL_422] (rows=73049 width=98) + Select Operator [SEL_425] (rows=73049 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_421] (rows=73049 width=98) + Filter Operator [FIL_424] (rows=73049 width=98) predicate:(d_date is not null and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Reducer 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_439] + <-Reducer 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_442] PartitionCols:_col0 - Group By Operator [GBY_438] (rows=2 width=94) + Group By Operator [GBY_441] (rows=2 width=94) Output:["_col0"],keys:KEY._col0 - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_32] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_33] PartitionCols:_col0 - Group By Operator [GBY_31] (rows=2 width=94) - Output:["_col0"],keys:_col2 - Merge Join Operator [MERGEJOIN_406] (rows=5 width=94) - Conds:RS_27._col1=RS_436._col1(Inner),Output:["_col2"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_436] + Group By Operator [GBY_32] (rows=2 width=94) + Output:["_col0"],keys:_col0 + Merge Join Operator [MERGEJOIN_409] (rows=5 width=94) + Conds:RS_431._col1=RS_29._col1(Inner),Output:["_col0"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_431] PartitionCols:_col1 - Select Operator [SEL_434] (rows=73049 width=98) + Select Operator [SEL_429] (rows=73049 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_432] (rows=73049 width=98) + Filter Operator [FIL_427] (rows=73049 width=98) predicate:(d_week_seq is not null and d_date is not null) - TableScan [TS_21] (rows=73049 width=98) + TableScan [TS_9] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date","d_week_seq"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_27] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_29] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_405] (rows=1 width=4) + Merge Join Operator [MERGEJOIN_408] (rows=1 width=4) Conds:(Inner),Output:["_col1"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_437] - Select Operator [SEL_435] (rows=1 width=4) + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_432] + Select Operator [SEL_430] (rows=1 width=4) Output:["_col0"] - Filter Operator [FIL_433] (rows=1 width=98) + Filter Operator [FIL_428] (rows=1 width=98) predicate:((d_date = '1998-02-19') and d_week_seq is not null) - Please refer to the previous TableScan [TS_21] - <-Reducer 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_431] - Select Operator [SEL_430] (rows=1 width=8) - Filter Operator [FIL_429] (rows=1 width=8) + Please refer to the previous TableScan [TS_9] + <-Reducer 25 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_440] + Select Operator [SEL_439] (rows=1 width=8) + Filter Operator [FIL_438] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_428] (rows=1 width=8) + Group By Operator [GBY_437] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_427] - Group By Operator [GBY_426] (rows=1 width=8) + <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_436] + Group By Operator [GBY_435] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_425] (rows=1 width=94) - Filter Operator [FIL_424] (rows=1 width=94) + Select Operator [SEL_434] (rows=1 width=94) + Filter Operator [FIL_433] (rows=1 width=94) predicate:(d_date = '1998-02-19') - TableScan [TS_9] (rows=73049 width=94) + TableScan [TS_12] (rows=73049 width=94) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date"] <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_142] + SHUFFLE [RS_145] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_412] (rows=143966864 width=215) - Conds:RS_465._col1=RS_449._col0(Inner),Output:["_col0","_col2","_col4"] + Merge Join Operator [MERGEJOIN_415] (rows=143966864 width=215) + Conds:RS_468._col1=RS_452._col0(Inner),Output:["_col0","_col2","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_449] + SHUFFLE [RS_452] PartitionCols:_col0 - Select Operator [SEL_446] (rows=462000 width=104) + Select Operator [SEL_449] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_445] (rows=462000 width=104) + Filter Operator [FIL_448] (rows=462000 width=104) predicate:(i_item_sk is not null and i_item_id is not null) TableScan [TS_3] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_465] + SHUFFLE [RS_468] PartitionCols:_col1 - Select Operator [SEL_464] (rows=143966864 width=119) + Select Operator [SEL_467] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_463] (rows=143966864 width=119) - predicate:(ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_143_date_dim_d_date_sk_min) AND DynamicValue(RS_143_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_143_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_100] (rows=144002668 width=119) + Filter Operator [FIL_466] (rows=143966864 width=119) + predicate:(ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_146_date_dim_d_date_sk_min) AND DynamicValue(RS_146_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_146_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_102] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_sales_price"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_462] - Group By Operator [GBY_461] (rows=1 width=12) + BROADCAST [RS_465] + Group By Operator [GBY_464] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 16 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_364] - Group By Operator [GBY_363] (rows=1 width=12) + SHUFFLE [RS_367] + Group By Operator [GBY_366] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_362] (rows=2 width=4) + Select Operator [SEL_365] (rows=2 width=4) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_407] + Please refer to the previous Merge Join Operator [MERGEJOIN_410] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_155] + SHUFFLE [RS_158] PartitionCols:_col0 - Filter Operator [FIL_153] (rows=1 width=772) + Filter Operator [FIL_156] (rows=1 width=772) predicate:(_col1 BETWEEN _col6 AND _col7 and _col5 BETWEEN _col2 AND _col3) - Merge Join Operator [MERGEJOIN_419] (rows=68 width=772) - Conds:RS_452._col0=RS_460._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_422] (rows=68 width=772) + Conds:RS_455._col0=RS_463._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7"] <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_460] + SHUFFLE [RS_463] PartitionCols:_col0 - Select Operator [SEL_459] (rows=69 width=436) + Select Operator [SEL_462] (rows=69 width=436) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_458] (rows=69 width=212) + Group By Operator [GBY_461] (rows=69 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_97] + SHUFFLE [RS_99] PartitionCols:_col0 - Group By Operator [GBY_96] (rows=69 width=212) + Group By Operator [GBY_98] (rows=69 width=212) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_417] (rows=120498 width=100) - Conds:RS_92._col0=RS_93._col0(Inner),Output:["_col2","_col4"] + Merge Join Operator [MERGEJOIN_420] (rows=120498 width=100) + Conds:RS_94._col0=RS_95._col0(Inner),Output:["_col2","_col4"] <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_93] + SHUFFLE [RS_95] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_407] + Please refer to the previous Merge Join Operator [MERGEJOIN_410] <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_92] + SHUFFLE [RS_94] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_408] (rows=550076554 width=210) - Conds:RS_457._col1=RS_448._col0(Inner),Output:["_col0","_col2","_col4"] + Merge Join Operator [MERGEJOIN_411] (rows=550076554 width=210) + Conds:RS_460._col1=RS_451._col0(Inner),Output:["_col0","_col2","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_448] + SHUFFLE [RS_451] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_446] + Please refer to the previous Select Operator [SEL_449] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_457] + SHUFFLE [RS_460] PartitionCols:_col1 - Select Operator [SEL_456] (rows=550076554 width=114) + Select Operator [SEL_459] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_455] (rows=550076554 width=114) - predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_93_date_dim_d_date_sk_min) AND DynamicValue(RS_93_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_93_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_50] (rows=575995635 width=114) + Filter Operator [FIL_458] (rows=550076554 width=114) + predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_95_date_dim_d_date_sk_min) AND DynamicValue(RS_95_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_95_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_51] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_454] - Group By Operator [GBY_453] (rows=1 width=12) + BROADCAST [RS_457] + Group By Operator [GBY_456] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 16 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_318] - Group By Operator [GBY_317] (rows=1 width=12) + SHUFFLE [RS_321] + Group By Operator [GBY_320] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_316] (rows=2 width=4) + Select Operator [SEL_319] (rows=2 width=4) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_407] + Please refer to the previous Merge Join Operator [MERGEJOIN_410] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_452] + SHUFFLE [RS_455] PartitionCols:_col0 - Select Operator [SEL_451] (rows=68 width=436) + Select Operator [SEL_454] (rows=68 width=436) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_450] (rows=68 width=212) + Group By Operator [GBY_453] (rows=68 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_47] + SHUFFLE [RS_48] PartitionCols:_col0 - Group By Operator [GBY_46] (rows=68 width=212) + Group By Operator [GBY_47] (rows=68 width=212) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col4 - Merge Join Operator [MERGEJOIN_416] (rows=62327 width=100) - Conds:RS_42._col0=RS_43._col0(Inner),Output:["_col2","_col4"] + Merge Join Operator [MERGEJOIN_419] (rows=62327 width=100) + Conds:RS_43._col0=RS_44._col0(Inner),Output:["_col2","_col4"] <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_43] + SHUFFLE [RS_44] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_407] + Please refer to the previous Merge Join Operator [MERGEJOIN_410] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_42] + SHUFFLE [RS_43] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_404] (rows=286549727 width=215) - Conds:RS_444._col1=RS_447._col0(Inner),Output:["_col0","_col2","_col4"] + Merge Join Operator [MERGEJOIN_407] (rows=286549727 width=215) + Conds:RS_447._col1=RS_450._col0(Inner),Output:["_col0","_col2","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_447] + SHUFFLE [RS_450] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_446] + Please refer to the previous Select Operator [SEL_449] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_444] + SHUFFLE [RS_447] PartitionCols:_col1 - Select Operator [SEL_443] (rows=286549727 width=119) + Select Operator [SEL_446] (rows=286549727 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_442] (rows=286549727 width=119) - predicate:(cs_sold_date_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_43_date_dim_d_date_sk_min) AND DynamicValue(RS_43_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_43_date_dim_d_date_sk_bloom_filter))) + Filter Operator [FIL_445] (rows=286549727 width=119) + predicate:(cs_sold_date_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_44_date_dim_d_date_sk_min) AND DynamicValue(RS_44_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_44_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=287989836 width=119) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_sales_price"] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_441] - Group By Operator [GBY_440] (rows=1 width=12) + BROADCAST [RS_444] + Group By Operator [GBY_443] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 16 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_268] - Group By Operator [GBY_267] (rows=1 width=12) + SHUFFLE [RS_271] + Group By Operator [GBY_270] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_266] (rows=2 width=4) + Select Operator [SEL_269] (rows=2 width=4) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_407] + Please refer to the previous Merge Join Operator [MERGEJOIN_410] diff --git a/ql/src/test/results/clientpositive/perf/tez/query59.q.out b/ql/src/test/results/clientpositive/perf/tez/query59.q.out index 1a2ba964f4..e2ca12bc9c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query59.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query59.q.out @@ -95,146 +95,146 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 14 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 15 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 13 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 8 <- Map 14 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 13 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_212] - Limit [LIM_211] (rows=100 width=976) + Reducer 4 vectorized + File Output Operator [FS_214] + Limit [LIM_213] (rows=100 width=976) Number of rows:100 - Select Operator [SEL_210] (rows=1012347 width=976) + Select Operator [SEL_212] (rows=1012347 width=976) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_59] - Select Operator [SEL_58] (rows=1012347 width=976) + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_61] + Select Operator [SEL_60] (rows=1012347 width=976) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Top N Key Operator [TNK_97] (rows=1012347 width=1648) - keys:_col12, _col11, _col0,top n:100 - Merge Join Operator [MERGEJOIN_184] (rows=1012347 width=1648) - Conds:RS_55._col11, _col0=RS_56._col1, (_col0 - 52)(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col20"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_56] + Top N Key Operator [TNK_99] (rows=1012347 width=1648) + keys:_col2, _col1, _col3,top n:100 + Merge Join Operator [MERGEJOIN_186] (rows=1012347 width=1648) + Conds:RS_57._col1, _col3=RS_58._col1, (_col0 - 52)(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col15","_col16","_col17","_col18","_col19","_col20"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_58] PartitionCols:_col1, (_col0 - 52) - Select Operator [SEL_48] (rows=28847 width=776) + Select Operator [SEL_53] (rows=28847 width=776) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_183] (rows=28847 width=776) - Conds:RS_45._col1=RS_209._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] + Merge Join Operator [MERGEJOIN_185] (rows=28847 width=776) + Conds:RS_211._col0=RS_51._col1(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] + SHUFFLE [RS_211] PartitionCols:_col0 - Select Operator [SEL_208] (rows=1704 width=104) + Select Operator [SEL_210] (rows=1704 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_207] (rows=1704 width=104) + Filter Operator [FIL_209] (rows=1704 width=104) predicate:(s_store_sk is not null and s_store_id is not null) - TableScan [TS_39] (rows=1704 width=104) + TableScan [TS_25] (rows=1704 width=104) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_45] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_51] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_182] (rows=28847 width=676) - Conds:RS_206._col0=RS_201._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] + Merge Join Operator [MERGEJOIN_184] (rows=28847 width=676) + Conds:RS_208._col0=RS_206._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_206] PartitionCols:_col0 - Select Operator [SEL_199] (rows=317 width=4) + Select Operator [SEL_204] (rows=317 width=4) Output:["_col0"] - Filter Operator [FIL_197] (rows=317 width=8) + Filter Operator [FIL_202] (rows=317 width=8) predicate:(d_month_seq BETWEEN 1197 AND 1208 and d_week_seq is not null) - TableScan [TS_15] (rows=73049 width=8) + TableScan [TS_18] (rows=73049 width=8) default@date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_week_seq"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_206] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_208] PartitionCols:_col0 - Group By Operator [GBY_205] (rows=1196832 width=679) + Group By Operator [GBY_207] (rows=1196832 width=679) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)"],keys:KEY._col0, KEY._col1 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_33] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_40] PartitionCols:_col0, _col1 - Group By Operator [GBY_32] (rows=525329897 width=679) + Group By Operator [GBY_39] (rows=525329897 width=679) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)","sum(_col3)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Select Operator [SEL_30] (rows=525329897 width=138) + Select Operator [SEL_37] (rows=525329897 width=138) Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_181] (rows=525329897 width=138) - Conds:RS_188._col0=RS_193._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col8","_col9","_col10","_col11"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_188] - PartitionCols:_col0 - Select Operator [SEL_186] (rows=525329897 width=114) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_185] (rows=525329897 width=114) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] + Merge Join Operator [MERGEJOIN_183] (rows=525329897 width=138) + Conds:RS_193._col0=RS_198._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col8","_col9","_col10","_col11"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_198] PartitionCols:_col0 - Select Operator [SEL_191] (rows=73049 width=36) + Select Operator [SEL_196] (rows=73049 width=36) Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_189] (rows=73049 width=99) + Filter Operator [FIL_194] (rows=73049 width=99) predicate:(d_date_sk is not null and d_week_seq is not null) - TableScan [TS_3] (rows=73049 width=99) + TableScan [TS_6] (rows=73049 width=99) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq","d_day_name"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col11, _col0 - Merge Join Operator [MERGEJOIN_180] (rows=28847 width=976) - Conds:RS_52._col1=RS_204._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col11","_col12"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_204] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_193] + PartitionCols:_col0 + Select Operator [SEL_191] (rows=525329897 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_190] (rows=525329897 width=114) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_3] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col1, _col3 + Merge Join Operator [MERGEJOIN_182] (rows=28847 width=976) + Conds:RS_189._col0=RS_55._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_189] PartitionCols:_col0 - Select Operator [SEL_203] (rows=1704 width=192) + Select Operator [SEL_188] (rows=1704 width=192) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_202] (rows=1704 width=192) + Filter Operator [FIL_187] (rows=1704 width=192) predicate:(s_store_sk is not null and s_store_id is not null) - TableScan [TS_18] (rows=1704 width=192) + TableScan [TS_0] (rows=1704 width=192) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_52] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_55] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_179] (rows=28847 width=788) - Conds:RS_195._col0=RS_200._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_200] + Merge Join Operator [MERGEJOIN_181] (rows=28847 width=788) + Conds:RS_200._col0=RS_205._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_205] PartitionCols:_col0 - Select Operator [SEL_198] (rows=317 width=4) + Select Operator [SEL_203] (rows=317 width=4) Output:["_col0"] - Filter Operator [FIL_196] (rows=317 width=8) + Filter Operator [FIL_201] (rows=317 width=8) predicate:(d_month_seq BETWEEN 1185 AND 1196 and d_week_seq is not null) - Please refer to the previous TableScan [TS_15] - <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_195] + Please refer to the previous TableScan [TS_18] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_200] PartitionCols:_col0 - Group By Operator [GBY_194] (rows=1196832 width=791) + Group By Operator [GBY_199] (rows=1196832 width=791) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_15] PartitionCols:_col0, _col1 - Group By Operator [GBY_11] (rows=525329897 width=791) + Group By Operator [GBY_14] (rows=525329897 width=791) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Select Operator [SEL_9] (rows=525329897 width=142) + Select Operator [SEL_12] (rows=525329897 width=142) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_178] (rows=525329897 width=142) - Conds:RS_187._col0=RS_192._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_187] + Merge Join Operator [MERGEJOIN_180] (rows=525329897 width=142) + Conds:RS_192._col0=RS_197._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_197] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_186] - <-Map 12 [SIMPLE_EDGE] vectorized + Select Operator [SEL_195] (rows=73049 width=36) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Please refer to the previous Filter Operator [FIL_194] + <-Map 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_192] PartitionCols:_col0 - Select Operator [SEL_190] (rows=73049 width=36) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Please refer to the previous Filter Operator [FIL_189] + Please refer to the previous Select Operator [SEL_191] diff --git a/ql/src/test/results/clientpositive/perf/tez/query6.q.out b/ql/src/test/results/clientpositive/perf/tez/query6.q.out index 9bfd383644..a955f0e5b6 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query6.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query6.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[173][bigTable=?] in task 'Reducer 15' is a cross product +Warning: Map Join MAPJOIN[175][bigTable=?] in task 'Reducer 17' is a cross product PREHOOK: query: explain select a.ca_state state, count(*) cnt from customer_address a @@ -64,179 +64,179 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 3 (BROADCAST_EDGE) -Map 16 <- Reducer 15 (BROADCAST_EDGE) -Map 6 <- Map 1 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Map 1 <- Map 6 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE) +Map 14 <- Reducer 17 (BROADCAST_EDGE) +Map 6 <- Reducer 8 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 2 (SIMPLE_EDGE) -Reducer 4 <- Map 2 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 8 <- Map 16 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 16 (SIMPLE_EDGE), Reducer 10 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 3 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) +Reducer 9 <- Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 10 vectorized - File Output Operator [FS_238] - Limit [LIM_237] (rows=1 width=94) + Reducer 5 vectorized + File Output Operator [FS_240] + Limit [LIM_239] (rows=1 width=94) Number of rows:100 - Select Operator [SEL_236] (rows=1 width=94) + Select Operator [SEL_238] (rows=1 width=94) Output:["_col0","_col1"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_235] - Top N Key Operator [TNK_234] (rows=1 width=94) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_237] + Top N Key Operator [TNK_236] (rows=1 width=94) keys:_col1,top n:100 - Filter Operator [FIL_233] (rows=1 width=94) + Filter Operator [FIL_235] (rows=1 width=94) predicate:(_col1 >= 10L) - Group By Operator [GBY_232] (rows=5 width=94) + Group By Operator [GBY_234] (rows=5 width=94) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_70] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_72] PartitionCols:_col0 - Group By Operator [GBY_69] (rows=15 width=94) + Group By Operator [GBY_71] (rows=15 width=94) Output:["_col0","_col1"],aggregations:["count()"],keys:_col9 - Merge Join Operator [MERGEJOIN_176] (rows=7192227 width=86) - Conds:RS_65._col4=RS_216._col0(Inner),Output:["_col9"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_216] + Merge Join Operator [MERGEJOIN_178] (rows=7192227 width=86) + Conds:RS_67._col1=RS_218._col0(Inner),Output:["_col9"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_218] PartitionCols:_col0 - Select Operator [SEL_215] (rows=153611 width=227) + Select Operator [SEL_217] (rows=153611 width=227) Output:["_col0"] - Filter Operator [FIL_214] (rows=153611 width=227) - predicate:(_col4 > _col1) - Map Join Operator [MAPJOIN_213] (rows=460833 width=227) - Conds:RS_210._col0=SEL_212._col2(Inner),Output:["_col1","_col3","_col4"] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_210] + Filter Operator [FIL_216] (rows=153611 width=227) + predicate:(_col1 > _col4) + Map Join Operator [MAPJOIN_215] (rows=460833 width=227) + Conds:SEL_214._col2=RS_212._col0(Inner),Output:["_col0","_col1","_col4"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_212] PartitionCols:_col0 - Map Join Operator [MAPJOIN_209] (rows=10 width=202) + Map Join Operator [MAPJOIN_211] (rows=10 width=202) Conds:(Inner),Output:["_col0","_col1"] - <-Reducer 5 [BROADCAST_EDGE] vectorized - BROADCAST [RS_205] - Select Operator [SEL_204] (rows=1 width=8) - Filter Operator [FIL_203] (rows=1 width=8) + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_207] + Select Operator [SEL_206] (rows=1 width=8) + Filter Operator [FIL_205] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_202] (rows=1 width=8) + Group By Operator [GBY_204] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_201] - Group By Operator [GBY_200] (rows=1 width=8) + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_203] + Group By Operator [GBY_202] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_199] (rows=25 width=4) - Group By Operator [GBY_198] (rows=25 width=4) + Select Operator [SEL_201] (rows=25 width=4) + Group By Operator [GBY_200] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 2 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_188] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_190] PartitionCols:_col0 - Group By Operator [GBY_186] (rows=25 width=4) + Group By Operator [GBY_188] (rows=25 width=4) Output:["_col0"],keys:d_month_seq - Select Operator [SEL_184] (rows=50 width=12) + Select Operator [SEL_186] (rows=50 width=12) Output:["d_month_seq"] - Filter Operator [FIL_182] (rows=50 width=12) + Filter Operator [FIL_184] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 2)) - TableScan [TS_3] (rows=73049 width=12) + TableScan [TS_6] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] - <-Select Operator [SEL_208] (rows=10 width=202) + <-Select Operator [SEL_210] (rows=10 width=202) Output:["_col0","_col1"] - Filter Operator [FIL_207] (rows=10 width=210) + Filter Operator [FIL_209] (rows=10 width=210) predicate:CAST( CAST( (_col1 / _col2) AS decimal(11,6)) AS decimal(16,6)) is not null - Group By Operator [GBY_206] (rows=10 width=210) + Group By Operator [GBY_208] (rows=10 width=210) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_197] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_199] PartitionCols:_col0 - Group By Operator [GBY_196] (rows=10 width=210) + Group By Operator [GBY_198] (rows=10 width=210) Output:["_col0","_col1","_col2"],aggregations:["sum(i_current_price)","count(i_current_price)"],keys:i_category - Filter Operator [FIL_195] (rows=462000 width=201) + Filter Operator [FIL_197] (rows=462000 width=201) predicate:i_category is not null - TableScan [TS_23] (rows=462000 width=201) + TableScan [TS_30] (rows=462000 width=201) default@item,j,Tbl:COMPLETE,Col:COMPLETE,Output:["i_current_price","i_category"] - <-Select Operator [SEL_212] (rows=460833 width=205) + <-Select Operator [SEL_214] (rows=460833 width=205) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_211] (rows=460833 width=205) + Filter Operator [FIL_213] (rows=460833 width=205) predicate:(i_current_price is not null and i_item_sk is not null and i_category is not null) - TableScan [TS_45] (rows=462000 width=205) + TableScan [TS_27] (rows=462000 width=205) default@item,i,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_category"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_65] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_175] (rows=7192227 width=90) - Conds:RS_225._col5=RS_63._col0(Inner),Output:["_col4","_col9"] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] - PartitionCols:_col5 - Map Join Operator [MAPJOIN_224] (rows=7192227 width=4) - Conds:RS_194._col0=SEL_223._col0(Inner),Output:["_col4","_col5"] - <-Map 1 [BROADCAST_EDGE] vectorized - BROADCAST [RS_194] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_67] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_177] (rows=7192227 width=90) + Conds:RS_227._col2=RS_65._col0(Inner),Output:["_col1","_col9"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_227] + PartitionCols:_col2 + Map Join Operator [MAPJOIN_226] (rows=7192227 width=4) + Conds:SEL_225._col0=RS_196._col0(Inner),Output:["_col1","_col2"] + <-Map 6 [BROADCAST_EDGE] vectorized + BROADCAST [RS_196] PartitionCols:_col0 - Map Join Operator [MAPJOIN_193] (rows=660 width=4) - Conds:SEL_192._col1=RS_190._col0(Inner),Output:["_col0"] - <-Reducer 3 [BROADCAST_EDGE] vectorized - BROADCAST [RS_190] + Map Join Operator [MAPJOIN_195] (rows=660 width=4) + Conds:SEL_194._col1=RS_192._col0(Inner),Output:["_col0"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_192] PartitionCols:_col0 - Group By Operator [GBY_189] (rows=25 width=4) + Group By Operator [GBY_191] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 2 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_187] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_189] PartitionCols:_col0 - Group By Operator [GBY_185] (rows=25 width=4) + Group By Operator [GBY_187] (rows=25 width=4) Output:["_col0"],keys:d_month_seq - Select Operator [SEL_183] (rows=50 width=12) + Select Operator [SEL_185] (rows=50 width=12) Output:["d_month_seq"] - Filter Operator [FIL_181] (rows=50 width=12) + Filter Operator [FIL_183] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 2) and d_month_seq is not null) - Please refer to the previous TableScan [TS_3] - <-Select Operator [SEL_192] (rows=73049 width=8) + Please refer to the previous TableScan [TS_6] + <-Select Operator [SEL_194] (rows=73049 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_191] (rows=73049 width=8) + Filter Operator [FIL_193] (rows=73049 width=8) predicate:(d_date_sk is not null and d_month_seq is not null) - TableScan [TS_0] (rows=73049 width=8) + TableScan [TS_3] (rows=73049 width=8) default@date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] - <-Select Operator [SEL_223] (rows=525327388 width=11) + <-Select Operator [SEL_225] (rows=525327388 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_222] (rows=525327388 width=11) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_item_sk is not null and ss_item_sk BETWEEN DynamicValue(RS_66_i_i_item_sk_min) AND DynamicValue(RS_66_i_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_66_i_i_item_sk_bloom_filter))) - TableScan [TS_10] (rows=575995635 width=11) + Filter Operator [FIL_224] (rows=525327388 width=11) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_item_sk is not null and ss_item_sk BETWEEN DynamicValue(RS_68_i_i_item_sk_min) AND DynamicValue(RS_68_i_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_68_i_i_item_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=11) default@store_sales,s,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_221] - Group By Operator [GBY_220] (rows=1 width=12) + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_223] + Group By Operator [GBY_222] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_219] - Group By Operator [GBY_218] (rows=1 width=12) + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_221] + Group By Operator [GBY_220] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_217] (rows=153611 width=4) + Select Operator [SEL_219] (rows=153611 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_215] + Please refer to the previous Select Operator [SEL_217] <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_63] + SHUFFLE [RS_65] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_172] (rows=80000000 width=90) - Conds:RS_228._col1=RS_231._col0(Inner),Output:["_col0","_col3"] + Merge Join Operator [MERGEJOIN_174] (rows=80000000 width=90) + Conds:RS_230._col1=RS_233._col0(Inner),Output:["_col0","_col3"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_228] + SHUFFLE [RS_230] PartitionCols:_col1 - Select Operator [SEL_227] (rows=80000000 width=8) + Select Operator [SEL_229] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_226] (rows=80000000 width=8) + Filter Operator [FIL_228] (rows=80000000 width=8) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_13] (rows=80000000 width=8) + TableScan [TS_17] (rows=80000000 width=8) default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_231] + SHUFFLE [RS_233] PartitionCols:_col0 - Select Operator [SEL_230] (rows=40000000 width=90) + Select Operator [SEL_232] (rows=40000000 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_229] (rows=40000000 width=90) + Filter Operator [FIL_231] (rows=40000000 width=90) predicate:ca_address_sk is not null - TableScan [TS_16] (rows=40000000 width=90) + TableScan [TS_20] (rows=40000000 width=90) default@customer_address,a,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query60.q.out b/ql/src/test/results/clientpositive/perf/tez/query60.q.out index e6692f514a..792de5821c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query60.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query60.q.out @@ -169,27 +169,27 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 14 <- Reducer 18 (BROADCAST_EDGE) -Map 26 <- Reducer 21 (BROADCAST_EDGE) -Map 27 <- Reducer 24 (BROADCAST_EDGE) -Reducer 10 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Map 18 <- Reducer 21 (BROADCAST_EDGE) +Map 26 <- Reducer 23 (BROADCAST_EDGE) +Map 27 <- Reducer 25 (BROADCAST_EDGE) +Reducer 10 <- Reducer 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 13 <- Map 12 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 16 <- Map 25 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 16 <- Map 14 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 17 <- Map 14 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 20 <- Map 25 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 22 <- Map 17 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 23 <- Map 25 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 20 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 23 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 20 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) +Reducer 25 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 2 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 8 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 5 (CONTAINS) Stage-0 @@ -197,232 +197,226 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_368] - Limit [LIM_367] (rows=100 width=212) + File Output Operator [FS_371] + Limit [LIM_370] (rows=100 width=212) Number of rows:100 - Select Operator [SEL_366] (rows=1717 width=212) + Select Operator [SEL_369] (rows=1717 width=212) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_365] - Top N Key Operator [TNK_364] (rows=1717 width=212) + SHUFFLE [RS_368] + Top N Key Operator [TNK_367] (rows=1717 width=212) keys:_col0, _col1,top n:100 - Group By Operator [GBY_363] (rows=1717 width=212) + Group By Operator [GBY_366] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Union 5 [SIMPLE_EDGE] <-Reducer 11 [CONTAINS] vectorized - Reduce Output Operator [RS_386] + Reduce Output Operator [RS_389] PartitionCols:_col0 - Group By Operator [GBY_385] (rows=1717 width=212) + Group By Operator [GBY_388] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_384] (rows=5151 width=212) + Top N Key Operator [TNK_387] (rows=5151 width=212) keys:_col0,top n:100 - Group By Operator [GBY_383] (rows=1717 width=212) + Group By Operator [GBY_386] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_109] + SHUFFLE [RS_112] PartitionCols:_col0 - Group By Operator [GBY_108] (rows=1717 width=212) + Group By Operator [GBY_111] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_310] (rows=379339 width=201) - Conds:RS_104._col0=RS_105._col2(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_313] (rows=379339 width=201) + Conds:RS_107._col0=RS_108._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_104] + SHUFFLE [RS_107] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_299] (rows=34340 width=104) - Conds:RS_328._col1=RS_334._col0(Inner),Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_302] (rows=34340 width=104) + Conds:RS_331._col1=RS_337._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_328] + SHUFFLE [RS_331] PartitionCols:_col1 - Select Operator [SEL_327] (rows=462000 width=104) + Select Operator [SEL_330] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_326] (rows=462000 width=104) + Filter Operator [FIL_329] (rows=462000 width=104) predicate:(i_item_id is not null and i_item_sk is not null) TableScan [TS_0] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_334] + SHUFFLE [RS_337] PartitionCols:_col0 - Group By Operator [GBY_333] (rows=23100 width=100) + Group By Operator [GBY_336] (rows=23100 width=100) Output:["_col0"],keys:KEY._col0 <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_332] + SHUFFLE [RS_335] PartitionCols:_col0 - Group By Operator [GBY_331] (rows=23100 width=100) + Group By Operator [GBY_334] (rows=23100 width=100) Output:["_col0"],keys:i_item_id - Select Operator [SEL_330] (rows=46200 width=190) + Select Operator [SEL_333] (rows=46200 width=190) Output:["i_item_id"] - Filter Operator [FIL_329] (rows=46200 width=190) + Filter Operator [FIL_332] (rows=46200 width=190) predicate:((i_category = 'Children') and i_item_id is not null) TableScan [TS_3] (rows=462000 width=190) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_category"] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_105] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_108] PartitionCols:_col2 - Select Operator [SEL_100] (rows=788222 width=110) - Output:["_col2","_col4"] - Merge Join Operator [MERGEJOIN_307] (rows=788222 width=110) - Conds:RS_97._col2=RS_358._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_358] - PartitionCols:_col0 - Select Operator [SEL_355] (rows=8000000 width=4) - Output:["_col0"] - Filter Operator [FIL_354] (rows=8000000 width=112) - predicate:((ca_gmt_offset = -6) and ca_address_sk is not null) - TableScan [TS_16] (rows=40000000 width=112) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_97] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_306] (rows=3941109 width=118) - Conds:RS_382._col0=RS_341._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_341] - PartitionCols:_col0 - Select Operator [SEL_336] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_335] (rows=50 width=12) - predicate:((d_year = 1999) and (d_moy = 9) and d_date_sk is not null) - TableScan [TS_13] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_382] - PartitionCols:_col0 - Select Operator [SEL_381] (rows=143931246 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_380] (rows=143931246 width=123) - predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_95_date_dim_d_date_sk_min) AND DynamicValue(RS_95_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_95_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_85] (rows=144002668 width=123) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_379] - Group By Operator [GBY_378] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_348] - Group By Operator [GBY_345] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_342] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_336] + Merge Join Operator [MERGEJOIN_310] (rows=788222 width=110) + Conds:RS_342._col0=RS_101._col2(Inner),Output:["_col2","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_342] + PartitionCols:_col0 + Select Operator [SEL_339] (rows=8000000 width=4) + Output:["_col0"] + Filter Operator [FIL_338] (rows=8000000 width=112) + predicate:((ca_gmt_offset = -6) and ca_address_sk is not null) + TableScan [TS_10] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_309] (rows=3941109 width=118) + Conds:RS_385._col0=RS_349._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_349] + PartitionCols:_col0 + Select Operator [SEL_344] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_343] (rows=50 width=12) + predicate:((d_year = 1999) and (d_moy = 9) and d_date_sk is not null) + TableScan [TS_16] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_385] + PartitionCols:_col0 + Select Operator [SEL_384] (rows=143931246 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_383] (rows=143931246 width=123) + predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_97_date_dim_d_date_sk_min) AND DynamicValue(RS_97_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_97_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_90] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_382] + Group By Operator [GBY_381] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_356] + Group By Operator [GBY_353] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_350] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_344] <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_362] + Reduce Output Operator [RS_365] PartitionCols:_col0 - Group By Operator [GBY_361] (rows=1717 width=212) + Group By Operator [GBY_364] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_360] (rows=5151 width=212) + Top N Key Operator [TNK_363] (rows=5151 width=212) keys:_col0,top n:100 - Group By Operator [GBY_359] (rows=1717 width=212) + Group By Operator [GBY_362] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_34] + SHUFFLE [RS_35] PartitionCols:_col0 - Group By Operator [GBY_33] (rows=1717 width=212) + Group By Operator [GBY_34] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_308] (rows=1384530 width=100) - Conds:RS_29._col0=RS_30._col2(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_311] (rows=1384530 width=100) + Conds:RS_30._col0=RS_31._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_299] - <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_30] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_302] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_31] PartitionCols:_col2 - Select Operator [SEL_25] (rows=2876890 width=4) - Output:["_col2","_col4"] - Merge Join Operator [MERGEJOIN_301] (rows=2876890 width=4) - Conds:RS_22._col2=RS_356._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_356] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_355] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_22] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_300] (rows=14384447 width=4) - Conds:RS_353._col0=RS_337._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_337] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_336] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_353] - PartitionCols:_col0 - Select Operator [SEL_352] (rows=525327191 width=118) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_351] (rows=525327191 width=118) - predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_10] (rows=575995635 width=118) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_350] - Group By Operator [GBY_349] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_346] - Group By Operator [GBY_343] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_338] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_336] + Merge Join Operator [MERGEJOIN_304] (rows=2876890 width=4) + Conds:RS_340._col0=RS_24._col2(Inner),Output:["_col2","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_340] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_339] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_303] (rows=14384447 width=4) + Conds:RS_361._col0=RS_345._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_345] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_344] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_361] + PartitionCols:_col0 + Select Operator [SEL_360] (rows=525327191 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_359] (rows=525327191 width=118) + predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_13] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_358] + Group By Operator [GBY_357] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_354] + Group By Operator [GBY_351] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_346] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_344] <-Reducer 9 [CONTAINS] vectorized - Reduce Output Operator [RS_377] + Reduce Output Operator [RS_380] PartitionCols:_col0 - Group By Operator [GBY_376] (rows=1717 width=212) + Group By Operator [GBY_379] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_375] (rows=5151 width=212) + Top N Key Operator [TNK_378] (rows=5151 width=212) keys:_col0,top n:100 - Group By Operator [GBY_374] (rows=1717 width=212) + Group By Operator [GBY_377] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_71] + SHUFFLE [RS_73] PartitionCols:_col0 - Group By Operator [GBY_70] (rows=1717 width=212) + Group By Operator [GBY_72] (rows=1717 width=212) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_309] (rows=746132 width=100) - Conds:RS_66._col0=RS_67._col3(Inner),Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_312] (rows=746132 width=100) + Conds:RS_68._col0=RS_69._col3(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_66] + SHUFFLE [RS_68] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_299] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_67] + Please refer to the previous Merge Join Operator [MERGEJOIN_302] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_69] PartitionCols:_col3 - Select Operator [SEL_62] (rows=1550375 width=13) - Output:["_col3","_col4"] - Merge Join Operator [MERGEJOIN_304] (rows=1550375 width=13) - Conds:RS_59._col1=RS_357._col0(Inner),Output:["_col2","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_357] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_355] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_59] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_303] (rows=7751872 width=98) - Conds:RS_373._col0=RS_339._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_339] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_336] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_373] - PartitionCols:_col0 - Select Operator [SEL_372] (rows=285117733 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_371] (rows=285117733 width=123) - predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_47] (rows=287989836 width=123) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_370] - Group By Operator [GBY_369] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_347] - Group By Operator [GBY_344] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_340] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_336] + Merge Join Operator [MERGEJOIN_307] (rows=1550375 width=13) + Conds:RS_341._col0=RS_62._col1(Inner),Output:["_col3","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_341] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_339] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_62] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_306] (rows=7751872 width=98) + Conds:RS_376._col0=RS_347._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 20 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_347] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_344] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_376] + PartitionCols:_col0 + Select Operator [SEL_375] (rows=285117733 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_374] (rows=285117733 width=123) + predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_58_date_dim_d_date_sk_min) AND DynamicValue(RS_58_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_58_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_51] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_373] + Group By Operator [GBY_372] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_355] + Group By Operator [GBY_352] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_348] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_344] diff --git a/ql/src/test/results/clientpositive/perf/tez/query63.q.out b/ql/src/test/results/clientpositive/perf/tez/query63.q.out index 6d7a54a808..2c16f1d0d2 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query63.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query63.q.out @@ -67,102 +67,102 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Map 5 <- Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 7 <- Map 10 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_107] - Limit [LIM_106] (rows=65 width=228) + Reducer 4 vectorized + File Output Operator [FS_108] + Limit [LIM_107] (rows=65 width=228) Number of rows:100 - Select Operator [SEL_105] (rows=65 width=228) + Select Operator [SEL_106] (rows=65 width=228) Output:["_col0","_col1","_col2"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_33] - Select Operator [SEL_30] (rows=65 width=228) + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_34] + Select Operator [SEL_31] (rows=65 width=228) Output:["_col0","_col1","_col2"] - Top N Key Operator [TNK_52] (rows=65 width=228) + Top N Key Operator [TNK_53] (rows=65 width=228) keys:_col0, avg_window_0, _col2,top n:100 - Filter Operator [FIL_46] (rows=65 width=228) + Filter Operator [FIL_47] (rows=65 width=228) predicate:CASE WHEN ((avg_window_0 > 0)) THEN (((abs((_col2 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (false) END - Select Operator [SEL_29] (rows=130 width=116) + Select Operator [SEL_30] (rows=130 width=116) Output:["avg_window_0","_col0","_col2"] - PTF Operator [PTF_28] (rows=130 width=116) + PTF Operator [PTF_29] (rows=130 width=116) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST","partition by:":"_col0"}] - Select Operator [SEL_25] (rows=130 width=116) + Select Operator [SEL_26] (rows=130 width=116) Output:["_col0","_col2"] - Group By Operator [GBY_24] (rows=130 width=120) + Group By Operator [GBY_25] (rows=130 width=120) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_23] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col0 - Group By Operator [GBY_22] (rows=130 width=120) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col5, _col7 - Merge Join Operator [MERGEJOIN_85] (rows=98800 width=8) - Conds:RS_18._col2=RS_102._col0(Inner),Output:["_col3","_col5","_col7"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_102] + Group By Operator [GBY_23] (rows=130 width=120) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col4)"],keys:_col6, _col8 + Merge Join Operator [MERGEJOIN_86] (rows=98800 width=8) + Conds:RS_89._col0=RS_20._col2(Inner),Output:["_col4","_col6","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_89] PartitionCols:_col0 - Select Operator [SEL_101] (rows=1704 width=4) + Select Operator [SEL_88] (rows=1704 width=4) Output:["_col0"] - Filter Operator [FIL_100] (rows=1704 width=4) + Filter Operator [FIL_87] (rows=1704 width=4) predicate:s_store_sk is not null - TableScan [TS_9] (rows=1704 width=4) + TableScan [TS_0] (rows=1704 width=4) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_20] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_84] (rows=98800 width=8) - Conds:RS_15._col0=RS_99._col0(Inner),Output:["_col2","_col3","_col5","_col7"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_99] + Merge Join Operator [MERGEJOIN_85] (rows=98800 width=8) + Conds:RS_15._col0=RS_103._col0(Inner),Output:["_col2","_col3","_col5","_col7"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_103] PartitionCols:_col0 - Select Operator [SEL_98] (rows=317 width=8) + Select Operator [SEL_102] (rows=317 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_97] (rows=317 width=12) + Filter Operator [FIL_101] (rows=317 width=12) predicate:((d_month_seq) IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=12) + TableScan [TS_9] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_moy"] - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_83] (rows=569118 width=4) - Conds:RS_96._col1=RS_88._col0(Inner),Output:["_col0","_col2","_col3","_col5"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_88] + Merge Join Operator [MERGEJOIN_84] (rows=569118 width=4) + Conds:RS_100._col1=RS_92._col0(Inner),Output:["_col0","_col2","_col3","_col5"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_92] PartitionCols:_col0 - Select Operator [SEL_87] (rows=52 width=8) + Select Operator [SEL_91] (rows=52 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_86] (rows=52 width=290) + Filter Operator [FIL_90] (rows=52 width=290) predicate:((i_class) IN ('personal', 'portable', 'refernece', 'self-help', 'accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9', 'amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1') and (i_category) IN ('Books', 'Children', 'Electronics', 'Women', 'Music', 'Men') and (((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) - TableScan [TS_3] (rows=462000 width=289) + TableScan [TS_6] (rows=462000 width=289) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_manager_id"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_96] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_100] PartitionCols:_col1 - Select Operator [SEL_95] (rows=525329897 width=118) + Select Operator [SEL_99] (rows=525329897 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_94] (rows=525329897 width=118) + Filter Operator [FIL_98] (rows=525329897 width=118) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=118) + TableScan [TS_3] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_93] - Group By Operator [GBY_92] (rows=1 width=12) + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_97] + Group By Operator [GBY_96] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_91] - Group By Operator [GBY_90] (rows=1 width=12) + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_95] + Group By Operator [GBY_94] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_89] (rows=52 width=4) + Select Operator [SEL_93] (rows=52 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_87] + Please refer to the previous Select Operator [SEL_91] diff --git a/ql/src/test/results/clientpositive/perf/tez/query64.q.out b/ql/src/test/results/clientpositive/perf/tez/query64.q.out index d8d3d34f72..2da97e8052 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query64.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query64.q.out @@ -265,526 +265,522 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 38 <- Reducer 25 (BROADCAST_EDGE) -Map 44 <- Reducer 41 (BROADCAST_EDGE) -Map 54 <- Reducer 33 (BROADCAST_EDGE) -Map 55 <- Reducer 43 (BROADCAST_EDGE) -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Map 42 <- Reducer 26 (BROADCAST_EDGE) +Map 48 <- Reducer 45 (BROADCAST_EDGE) +Map 54 <- Reducer 34 (BROADCAST_EDGE) +Map 55 <- Reducer 47 (BROADCAST_EDGE) +Reducer 10 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 37 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Reducer 32 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 14 <- Map 53 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Map 36 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (SIMPLE_EDGE), Reducer 39 (SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 46 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 20 <- Map 50 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 34 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Map 51 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 23 <- Map 37 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Map 52 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) -Reducer 25 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 26 <- Map 17 (SIMPLE_EDGE), Reducer 42 (SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Reducer 49 (SIMPLE_EDGE) -Reducer 28 <- Map 50 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) -Reducer 29 <- Map 34 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) -Reducer 3 <- Map 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 51 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) -Reducer 31 <- Map 37 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) -Reducer 32 <- Map 52 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) -Reducer 33 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 35 <- Map 34 (SIMPLE_EDGE), Map 36 (SIMPLE_EDGE) -Reducer 39 <- Map 38 (SIMPLE_EDGE), Map 40 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE) -Reducer 41 <- Map 40 (CUSTOM_SIMPLE_EDGE) -Reducer 42 <- Map 40 (SIMPLE_EDGE), Map 54 (SIMPLE_EDGE) -Reducer 43 <- Map 40 (CUSTOM_SIMPLE_EDGE) -Reducer 45 <- Map 44 (SIMPLE_EDGE), Map 47 (SIMPLE_EDGE) -Reducer 46 <- Reducer 45 (SIMPLE_EDGE) -Reducer 48 <- Map 47 (SIMPLE_EDGE), Map 55 (SIMPLE_EDGE) -Reducer 49 <- Reducer 48 (SIMPLE_EDGE) -Reducer 5 <- Map 37 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 53 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 24 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 53 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 36 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) +Reducer 15 <- Map 18 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE) +Reducer 17 <- Map 38 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Reducer 43 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 50 (SIMPLE_EDGE) +Reducer 21 <- Map 41 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 35 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Map 40 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Map 38 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Map 39 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 26 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 18 (SIMPLE_EDGE), Reducer 46 (SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (SIMPLE_EDGE), Reducer 53 (SIMPLE_EDGE) +Reducer 29 <- Map 41 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 3 <- Map 37 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 35 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) +Reducer 31 <- Map 40 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 32 <- Map 38 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 33 <- Map 39 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) +Reducer 34 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 36 <- Map 35 (SIMPLE_EDGE), Map 37 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 43 <- Map 42 (SIMPLE_EDGE), Map 44 (SIMPLE_EDGE) +Reducer 45 <- Map 44 (CUSTOM_SIMPLE_EDGE) +Reducer 46 <- Map 44 (SIMPLE_EDGE), Map 54 (SIMPLE_EDGE) +Reducer 47 <- Map 44 (CUSTOM_SIMPLE_EDGE) +Reducer 49 <- Map 48 (SIMPLE_EDGE), Map 51 (SIMPLE_EDGE) +Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 50 <- Reducer 49 (SIMPLE_EDGE) +Reducer 52 <- Map 51 (SIMPLE_EDGE), Map 55 (SIMPLE_EDGE) +Reducer 53 <- Reducer 52 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 8 <- Reducer 25 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 33 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 12 vectorized - File Output Operator [FS_1216] - Select Operator [SEL_1215] (rows=114089652126 width=1702) + Reducer 6 vectorized + File Output Operator [FS_1224] + Select Operator [SEL_1223] (rows=114089652126 width=1702) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_259] - Select Operator [SEL_258] (rows=114089652126 width=1694) + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_271] + Select Operator [SEL_270] (rows=114089652126 width=1694) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] - Filter Operator [FIL_257] (rows=114089652126 width=1694) + Filter Operator [FIL_269] (rows=114089652126 width=1694) predicate:(_col19 <= _col12) - Merge Join Operator [MERGEJOIN_1111] (rows=342268956379 width=1694) - Conds:RS_1195._col2, _col1, _col3=RS_1214._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] - <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1195] - PartitionCols:_col2, _col1, _col3 - Select Operator [SEL_1194] (rows=23886447 width=1354) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - Filter Operator [FIL_1193] (rows=23886447 width=1362) + Merge Join Operator [MERGEJOIN_1119] (rows=342268956379 width=1694) + Conds:RS_1203._col2, _col1, _col3=RS_1222._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] + <-Reducer 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1222] + PartitionCols:_col1, _col0, _col2 + Select Operator [SEL_1221] (rows=23886447 width=525) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_1220] (rows=23886447 width=1362) predicate:_col14 is not null - Select Operator [SEL_1192] (rows=23886447 width=1362) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col14","_col15","_col16","_col17"] - Group By Operator [GBY_1191] (rows=23886447 width=1362) + Select Operator [SEL_1219] (rows=23886447 width=1362) + Output:["_col1","_col2","_col3","_col14","_col15","_col16","_col17"] + Group By Operator [GBY_1218] (rows=23886447 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_122] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_261] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Group By Operator [GBY_121] (rows=23886447 width=1362) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col42)","sum(_col43)","sum(_col44)"],keys:_col28, _col45, _col29, _col7, _col9, _col14, _col15, _col16, _col17, _col23, _col24, _col25, _col26, _col46 - Merge Join Operator [MERGEJOIN_1092] (rows=24224230 width=1153) - Conds:RS_117._col31=RS_1139._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col23","_col24","_col25","_col26","_col28","_col29","_col42","_col43","_col44","_col45","_col46"] - <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1139] + Group By Operator [GBY_260] (rows=23886447 width=1362) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col44)","sum(_col45)","sum(_col46)"],keys:_col30, _col47, _col31, _col9, _col11, _col16, _col17, _col18, _col19, _col25, _col26, _col27, _col28, _col48 + Merge Join Operator [MERGEJOIN_1118] (rows=24224230 width=1153) + Conds:RS_256._col33=RS_1153._col0(Inner),Output:["_col9","_col11","_col16","_col17","_col18","_col19","_col25","_col26","_col27","_col28","_col30","_col31","_col44","_col45","_col46","_col47","_col48"] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1153] PartitionCols:_col0 - Select Operator [SEL_1137] (rows=20 width=4) + Select Operator [SEL_1150] (rows=20 width=4) Output:["_col0"] - Filter Operator [FIL_1136] (rows=20 width=4) + Filter Operator [FIL_1149] (rows=20 width=4) predicate:ib_income_band_sk is not null - TableScan [TS_12] (rows=20 width=4) + TableScan [TS_15] (rows=20 width=4) default@income_band,ib2,Tbl:COMPLETE,Col:COMPLETE,Output:["ib_income_band_sk"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_117] - PartitionCols:_col31 - Filter Operator [FIL_116] (rows=24224230 width=1327) - predicate:(_col50 <> _col19) - Merge Join Operator [MERGEJOIN_1091] (rows=24224230 width=1327) - Conds:RS_113._col36=RS_1148._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col31","_col42","_col43","_col44","_col45","_col46","_col50"] - <-Map 53 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1148] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_256] + PartitionCols:_col33 + Filter Operator [FIL_255] (rows=24224230 width=1327) + predicate:(_col1 <> _col21) + Merge Join Operator [MERGEJOIN_1117] (rows=24224230 width=1327) + Conds:RS_1124._col0=RS_253._col36(Inner),Output:["_col1","_col9","_col11","_col16","_col17","_col18","_col19","_col21","_col25","_col26","_col27","_col28","_col30","_col31","_col33","_col44","_col45","_col46","_col47","_col48"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1124] PartitionCols:_col0 - Select Operator [SEL_1147] (rows=1861800 width=89) + Select Operator [SEL_1121] (rows=1861800 width=89) Output:["_col0","_col1"] - Filter Operator [FIL_1146] (rows=1861800 width=89) + Filter Operator [FIL_1120] (rows=1861800 width=89) predicate:cd_demo_sk is not null - TableScan [TS_89] (rows=1861800 width=89) + TableScan [TS_0] (rows=1861800 width=89) default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_113] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_253] PartitionCols:_col36 - Merge Join Operator [MERGEJOIN_1090] (rows=23886447 width=1240) - Conds:RS_110._col0=RS_111._col15(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col31","_col36","_col42","_col43","_col44","_col45","_col46"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_110] + Merge Join Operator [MERGEJOIN_1116] (rows=23886447 width=1240) + Conds:RS_245._col0=RS_246._col15(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col31","_col36","_col42","_col43","_col44","_col45","_col46"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_245] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1080] (rows=70357394 width=458) - Conds:RS_107._col1=RS_1149._col0(Inner),Output:["_col0","_col7","_col9","_col14","_col15","_col16","_col17","_col19"] - <-Map 53 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1149] + Merge Join Operator [MERGEJOIN_1088] (rows=70357394 width=458) + Conds:RS_109._col1=RS_1123._col0(Inner),Output:["_col0","_col7","_col9","_col14","_col15","_col16","_col17","_col19"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1123] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1147] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_107] + Please refer to the previous Select Operator [SEL_1121] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_109] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1079] (rows=69376329 width=376) - Conds:RS_104._col3=RS_1143._col0(Inner),Output:["_col0","_col1","_col7","_col9","_col14","_col15","_col16","_col17"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1143] + Merge Join Operator [MERGEJOIN_1087] (rows=69376329 width=376) + Conds:RS_106._col3=RS_1156._col0(Inner),Output:["_col0","_col1","_col7","_col9","_col14","_col15","_col16","_col17"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1156] PartitionCols:_col0 - Select Operator [SEL_1142] (rows=40000000 width=365) + Select Operator [SEL_1155] (rows=40000000 width=365) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_1141] (rows=40000000 width=365) + Filter Operator [FIL_1154] (rows=40000000 width=365) predicate:ca_address_sk is not null - TableScan [TS_19] (rows=40000000 width=365) + TableScan [TS_22] (rows=40000000 width=365) default@customer_address,ad2,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_city","ca_zip"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_104] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_106] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_1078] (rows=69376329 width=19) - Conds:RS_101._col2=RS_102._col0(Inner),Output:["_col0","_col1","_col3","_col7","_col9"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_101] + Merge Join Operator [MERGEJOIN_1086] (rows=69376329 width=19) + Conds:RS_103._col2=RS_104._col0(Inner),Output:["_col0","_col1","_col3","_col7","_col9"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_103] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_1076] (rows=69376329 width=23) - Conds:RS_98._col4=RS_1122._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col7","_col9"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1122] + Merge Join Operator [MERGEJOIN_1084] (rows=69376329 width=23) + Conds:RS_100._col4=RS_1135._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col7","_col9"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1135] PartitionCols:_col0 - Select Operator [SEL_1118] (rows=73049 width=8) + Select Operator [SEL_1131] (rows=73049 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_1115] (rows=73049 width=8) + Filter Operator [FIL_1128] (rows=73049 width=8) predicate:d_date_sk is not null - TableScan [TS_3] (rows=73049 width=8) + TableScan [TS_6] (rows=73049 width=8) default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_98] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_100] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_1075] (rows=69376329 width=23) - Conds:RS_1114._col5=RS_1121._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col7"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1121] + Merge Join Operator [MERGEJOIN_1083] (rows=69376329 width=23) + Conds:RS_1127._col5=RS_1134._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col7"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1134] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1118] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1114] + Please refer to the previous Select Operator [SEL_1131] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1127] PartitionCols:_col5 - Select Operator [SEL_1113] (rows=69376329 width=23) + Select Operator [SEL_1126] (rows=69376329 width=23) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1112] (rows=69376329 width=23) + Filter Operator [FIL_1125] (rows=69376329 width=23) predicate:(c_first_shipto_date_sk is not null and c_first_sales_date_sk is not null and c_current_hdemo_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null and c_current_addr_sk is not null) - TableScan [TS_0] (rows=80000000 width=23) + TableScan [TS_3] (rows=80000000 width=23) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_shipto_date_sk","c_first_sales_date_sk"] - <-Reducer 35 [SIMPLE_EDGE] - SHUFFLE [RS_102] + <-Reducer 36 [SIMPLE_EDGE] + SHUFFLE [RS_104] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1077] (rows=7200 width=4) - Conds:RS_1133._col1=RS_1138._col0(Inner),Output:["_col0"] - <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1133] + Merge Join Operator [MERGEJOIN_1085] (rows=7200 width=4) + Conds:RS_1146._col1=RS_1151._col0(Inner),Output:["_col0"] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1146] PartitionCols:_col1 - Select Operator [SEL_1132] (rows=7200 width=8) + Select Operator [SEL_1145] (rows=7200 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_1131] (rows=7200 width=8) + Filter Operator [FIL_1144] (rows=7200 width=8) predicate:(hd_demo_sk is not null and hd_income_band_sk is not null) - TableScan [TS_9] (rows=7200 width=8) + TableScan [TS_12] (rows=7200 width=8) default@household_demographics,hd2,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_income_band_sk"] - <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1138] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1151] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1137] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_111] + Please refer to the previous Select Operator [SEL_1150] + <-Reducer 33 [SIMPLE_EDGE] + SHUFFLE [RS_246] PartitionCols:_col15 - Select Operator [SEL_88] (rows=23886447 width=788) - Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col11","_col15","_col16","_col22","_col23","_col24","_col25","_col26"] - Merge Join Operator [MERGEJOIN_1089] (rows=23886447 width=788) - Conds:RS_85._col1, _col8=RS_1189._col0, _col1(Inner),Output:["_col2","_col3","_col9","_col10","_col11","_col12","_col13","_col18","_col20","_col21","_col23","_col24","_col25","_col26"] - <-Map 52 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1189] - PartitionCols:_col0, _col1 - Select Operator [SEL_1188] (rows=57591150 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_1187] (rows=57591150 width=8) - predicate:(sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_61] (rows=57591150 width=8) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_85] - PartitionCols:_col1, _col8 - Merge Join Operator [MERGEJOIN_1088] (rows=14487982 width=661) - Conds:RS_82._col5=RS_1144._col0(Inner),Output:["_col1","_col2","_col3","_col8","_col9","_col10","_col11","_col12","_col13","_col18","_col20","_col21","_col23","_col24","_col25","_col26"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1144] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1142] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_82] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_1087] (rows=14487982 width=300) - Conds:RS_79._col6=RS_1185._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col18","_col20","_col21"] - <-Map 51 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1185] - PartitionCols:_col0 - Select Operator [SEL_1184] (rows=1704 width=181) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1183] (rows=1704 width=181) - predicate:(s_store_sk is not null and s_store_name is not null and s_zip is not null) - TableScan [TS_55] (rows=1704 width=181) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_79] - PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_1086] (rows=14487982 width=123) - Conds:RS_76._col4=RS_1134._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col13","_col18"] - <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1134] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1132] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_76] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_1085] (rows=14487982 width=119) - Conds:RS_73._col7=RS_1181._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 50 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1181] - PartitionCols:_col0 - Select Operator [SEL_1180] (rows=2300 width=4) - Output:["_col0"] - Filter Operator [FIL_1179] (rows=2300 width=4) - predicate:p_promo_sk is not null - TableScan [TS_49] (rows=2300 width=4) - default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_73] - PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_1084] (rows=14487982 width=119) - Conds:RS_70._col1=RS_1178._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_70] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1082] (rows=14487982 width=119) - Conds:RS_67._col0=RS_1123._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1123] - PartitionCols:_col0 - Select Operator [SEL_1119] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_1116] (rows=652 width=8) - predicate:((d_year = 2000) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Reducer 39 [SIMPLE_EDGE] - SHUFFLE [RS_67] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1081] (rows=40575792 width=314) - Conds:RS_1155._col1=RS_1158._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 40 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1158] - PartitionCols:_col0 - Select Operator [SEL_1157] (rows=4667 width=111) - Output:["_col0","_col1"] - Filter Operator [FIL_1156] (rows=4667 width=311) - predicate:(i_current_price BETWEEN 36 AND 45 and (i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_item_sk is not null) - TableScan [TS_28] (rows=462000 width=311) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1155] - PartitionCols:_col1 - Select Operator [SEL_1154] (rows=417313408 width=355) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_1153] (rows=417313408 width=355) - predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_promo_sk is not null and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_68_d1_d_date_sk_min) AND DynamicValue(RS_68_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_68_d1_d_date_sk_bloom_filter))) - TableScan [TS_25] (rows=575995635 width=355) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1152] - Group By Operator [GBY_1151] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1129] - Group By Operator [GBY_1127] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1124] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1119] - <-Reducer 46 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1178] - PartitionCols:_col0 - Select Operator [SEL_1177] (rows=13257 width=4) - Output:["_col0"] - Filter Operator [FIL_1176] (rows=13257 width=228) - predicate:(_col1 > (2 * _col2)) - Group By Operator [GBY_1175] (rows=39773 width=228) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 45 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col0 - Group By Operator [GBY_44] (rows=6482999 width=228) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 - Merge Join Operator [MERGEJOIN_1083] (rows=183085709 width=227) - Conds:RS_1170._col0, _col1=RS_1173._col0, _col1(Inner),Output:["_col0","_col2","_col5"] - <-Map 47 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1173] - PartitionCols:_col0, _col1 - Select Operator [SEL_1172] (rows=28798881 width=120) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1171] (rows=28798881 width=337) - predicate:(cr_item_sk is not null and cr_order_number is not null) - TableScan [TS_37] (rows=28798881 width=337) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] - <-Map 44 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1170] - PartitionCols:_col0, _col1 - Select Operator [SEL_1169] (rows=287989836 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1168] (rows=287989836 width=119) - predicate:(cs_item_sk is not null and cs_order_number is not null and cs_item_sk BETWEEN DynamicValue(RS_65_item_i_item_sk_min) AND DynamicValue(RS_65_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_65_item_i_item_sk_bloom_filter))) - TableScan [TS_34] (rows=287989836 width=119) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] - <-Reducer 41 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1167] - Group By Operator [GBY_1166] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 40 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1164] - Group By Operator [GBY_1162] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1159] (rows=4667 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1157] - <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1214] - PartitionCols:_col1, _col0, _col2 - Select Operator [SEL_1213] (rows=23886447 width=525) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_1212] (rows=23886447 width=1362) + Merge Join Operator [MERGEJOIN_1115] (rows=23886447 width=788) + Conds:RS_1198._col0, _col1=RS_227._col12, _col19(Inner),Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col11","_col15","_col16","_col22","_col23","_col24","_col25","_col26"] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1198] + PartitionCols:_col0, _col1 + Select Operator [SEL_1196] (rows=57591150 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_1195] (rows=57591150 width=8) + predicate:(sr_item_sk is not null and sr_ticket_number is not null) + TableScan [TS_28] (rows=57591150 width=8) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] + <-Reducer 32 [SIMPLE_EDGE] + SHUFFLE [RS_227] + PartitionCols:_col12, _col19 + Merge Join Operator [MERGEJOIN_1114] (rows=14487982 width=661) + Conds:RS_1158._col0=RS_223._col11(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col9","_col12","_col13","_col14","_col19","_col20","_col21","_col22","_col23","_col24"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1158] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1155] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_223] + PartitionCols:_col11 + Merge Join Operator [MERGEJOIN_1113] (rows=14487982 width=300) + Conds:RS_1194._col0=RS_219._col9(Inner),Output:["_col1","_col2","_col4","_col7","_col8","_col9","_col11","_col14","_col15","_col16","_col17","_col18","_col19"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1194] + PartitionCols:_col0 + Select Operator [SEL_1192] (rows=1704 width=181) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1191] (rows=1704 width=181) + predicate:(s_store_sk is not null and s_store_name is not null and s_zip is not null) + TableScan [TS_34] (rows=1704 width=181) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_219] + PartitionCols:_col9 + Merge Join Operator [MERGEJOIN_1112] (rows=14487982 width=123) + Conds:RS_1148._col0=RS_215._col5(Inner),Output:["_col1","_col4","_col5","_col6","_col8","_col9","_col11","_col12","_col13","_col14","_col15","_col16"] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1148] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1145] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_215] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_1111] (rows=14487982 width=119) + Conds:RS_1190._col0=RS_211._col7(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Map 41 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1190] + PartitionCols:_col0 + Select Operator [SEL_1188] (rows=2300 width=4) + Output:["_col0"] + Filter Operator [FIL_1187] (rows=2300 width=4) + predicate:p_promo_sk is not null + TableScan [TS_40] (rows=2300 width=4) + default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk"] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_211] + PartitionCols:_col7 + Merge Join Operator [MERGEJOIN_1110] (rows=14487982 width=119) + Conds:RS_206._col1=RS_1217._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_206] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1108] (rows=14487982 width=119) + Conds:RS_203._col0=RS_1138._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1138] + PartitionCols:_col0 + Select Operator [SEL_1133] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_1130] (rows=652 width=8) + predicate:((d_year = 2001) and d_date_sk is not null) + Please refer to the previous TableScan [TS_6] + <-Reducer 46 [SIMPLE_EDGE] + SHUFFLE [RS_203] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_1107] (rows=40575792 width=314) + Conds:RS_1208._col1=RS_1168._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 44 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1168] + PartitionCols:_col0 + Select Operator [SEL_1165] (rows=4667 width=111) + Output:["_col0","_col1"] + Filter Operator [FIL_1164] (rows=4667 width=311) + predicate:(i_current_price BETWEEN 36 AND 45 and (i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_item_sk is not null) + TableScan [TS_46] (rows=462000 width=311) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] + <-Map 54 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1208] + PartitionCols:_col1 + Select Operator [SEL_1207] (rows=417313408 width=355) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Filter Operator [FIL_1206] (rows=417313408 width=355) + predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_promo_sk is not null and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_204_d1_d_date_sk_min) AND DynamicValue(RS_204_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_204_d1_d_date_sk_bloom_filter))) + TableScan [TS_176] (rows=575995635 width=355) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 34 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1205] + Group By Operator [GBY_1204] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1143] + Group By Operator [GBY_1141] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1139] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1133] + <-Reducer 53 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1217] + PartitionCols:_col0 + Select Operator [SEL_1216] (rows=13257 width=4) + Output:["_col0"] + Filter Operator [FIL_1215] (rows=13257 width=228) + predicate:(_col1 > (2 * _col2)) + Group By Operator [GBY_1214] (rows=39773 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 52 [SIMPLE_EDGE] + SHUFFLE [RS_196] + PartitionCols:_col0 + Group By Operator [GBY_195] (rows=6482999 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 + Merge Join Operator [MERGEJOIN_1109] (rows=183085709 width=227) + Conds:RS_1213._col0, _col1=RS_1182._col0, _col1(Inner),Output:["_col0","_col2","_col5"] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1182] + PartitionCols:_col0, _col1 + Select Operator [SEL_1180] (rows=28798881 width=120) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1179] (rows=28798881 width=337) + predicate:(cr_item_sk is not null and cr_order_number is not null) + TableScan [TS_55] (rows=28798881 width=337) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] + <-Map 55 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1213] + PartitionCols:_col0, _col1 + Select Operator [SEL_1212] (rows=287989836 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1211] (rows=287989836 width=119) + predicate:(cs_item_sk is not null and cs_order_number is not null and cs_item_sk BETWEEN DynamicValue(RS_201_item_i_item_sk_min) AND DynamicValue(RS_201_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_201_item_i_item_sk_bloom_filter))) + TableScan [TS_185] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] + <-Reducer 47 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1210] + Group By Operator [GBY_1209] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 44 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1173] + Group By Operator [GBY_1171] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1169] (rows=4667 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1165] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1203] + PartitionCols:_col2, _col1, _col3 + Select Operator [SEL_1202] (rows=23886447 width=1354) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + Filter Operator [FIL_1201] (rows=23886447 width=1362) predicate:_col14 is not null - Select Operator [SEL_1211] (rows=23886447 width=1362) - Output:["_col1","_col2","_col3","_col14","_col15","_col16","_col17"] - Group By Operator [GBY_1210] (rows=23886447 width=1362) + Select Operator [SEL_1200] (rows=23886447 width=1362) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col14","_col15","_col16","_col17"] + Group By Operator [GBY_1199] (rows=23886447 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_249] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_128] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Group By Operator [GBY_248] (rows=23886447 width=1362) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col42)","sum(_col43)","sum(_col44)"],keys:_col28, _col45, _col29, _col7, _col9, _col14, _col15, _col16, _col17, _col23, _col24, _col25, _col26, _col46 - Merge Join Operator [MERGEJOIN_1110] (rows=24224230 width=1153) - Conds:RS_244._col31=RS_1140._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col23","_col24","_col25","_col26","_col28","_col29","_col42","_col43","_col44","_col45","_col46"] - <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1140] + Group By Operator [GBY_127] (rows=23886447 width=1362) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col44)","sum(_col45)","sum(_col46)"],keys:_col30, _col47, _col31, _col9, _col11, _col16, _col17, _col18, _col19, _col25, _col26, _col27, _col28, _col48 + Merge Join Operator [MERGEJOIN_1100] (rows=24224230 width=1153) + Conds:RS_123._col33=RS_1152._col0(Inner),Output:["_col9","_col11","_col16","_col17","_col18","_col19","_col25","_col26","_col27","_col28","_col30","_col31","_col44","_col45","_col46","_col47","_col48"] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1152] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1137] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_244] - PartitionCols:_col31 - Filter Operator [FIL_243] (rows=24224230 width=1327) - predicate:(_col50 <> _col19) - Merge Join Operator [MERGEJOIN_1109] (rows=24224230 width=1327) - Conds:RS_240._col36=RS_1150._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col31","_col42","_col43","_col44","_col45","_col46","_col50"] - <-Map 53 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1150] + Please refer to the previous Select Operator [SEL_1150] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_123] + PartitionCols:_col33 + Filter Operator [FIL_122] (rows=24224230 width=1327) + predicate:(_col1 <> _col21) + Merge Join Operator [MERGEJOIN_1099] (rows=24224230 width=1327) + Conds:RS_1122._col0=RS_120._col36(Inner),Output:["_col1","_col9","_col11","_col16","_col17","_col18","_col19","_col21","_col25","_col26","_col27","_col28","_col30","_col31","_col33","_col44","_col45","_col46","_col47","_col48"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1122] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1147] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_240] + Please refer to the previous Select Operator [SEL_1121] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_120] PartitionCols:_col36 - Merge Join Operator [MERGEJOIN_1108] (rows=23886447 width=1240) - Conds:RS_237._col0=RS_238._col15(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col31","_col36","_col42","_col43","_col44","_col45","_col46"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_237] + Merge Join Operator [MERGEJOIN_1098] (rows=23886447 width=1240) + Conds:RS_112._col0=RS_113._col15(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col31","_col36","_col42","_col43","_col44","_col45","_col46"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_112] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_1080] - <-Reducer 32 [SIMPLE_EDGE] - SHUFFLE [RS_238] + Please refer to the previous Merge Join Operator [MERGEJOIN_1088] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_113] PartitionCols:_col15 - Select Operator [SEL_215] (rows=23886447 width=788) - Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col11","_col15","_col16","_col22","_col23","_col24","_col25","_col26"] - Merge Join Operator [MERGEJOIN_1107] (rows=23886447 width=788) - Conds:RS_212._col1, _col8=RS_1190._col0, _col1(Inner),Output:["_col2","_col3","_col9","_col10","_col11","_col12","_col13","_col18","_col20","_col21","_col23","_col24","_col25","_col26"] - <-Map 52 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1190] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_1188] - <-Reducer 31 [SIMPLE_EDGE] - SHUFFLE [RS_212] - PartitionCols:_col1, _col8 - Merge Join Operator [MERGEJOIN_1106] (rows=14487982 width=661) - Conds:RS_209._col5=RS_1145._col0(Inner),Output:["_col1","_col2","_col3","_col8","_col9","_col10","_col11","_col12","_col13","_col18","_col20","_col21","_col23","_col24","_col25","_col26"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1145] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1142] - <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_209] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_1105] (rows=14487982 width=300) - Conds:RS_206._col6=RS_1186._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col18","_col20","_col21"] - <-Map 51 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1186] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1184] - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_206] - PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_1104] (rows=14487982 width=123) - Conds:RS_203._col4=RS_1135._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col13","_col18"] - <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1135] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1132] - <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_203] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_1103] (rows=14487982 width=119) - Conds:RS_200._col7=RS_1182._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 50 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1182] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1180] - <-Reducer 27 [SIMPLE_EDGE] - SHUFFLE [RS_200] - PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_1102] (rows=14487982 width=119) - Conds:RS_197._col1=RS_1209._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_197] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1100] (rows=14487982 width=119) - Conds:RS_194._col0=RS_1125._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1125] - PartitionCols:_col0 - Select Operator [SEL_1120] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_1117] (rows=652 width=8) - predicate:((d_year = 2001) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Reducer 42 [SIMPLE_EDGE] - SHUFFLE [RS_194] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1099] (rows=40575792 width=314) - Conds:RS_1200._col1=RS_1160._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 40 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1160] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1157] - <-Map 54 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1200] - PartitionCols:_col1 - Select Operator [SEL_1199] (rows=417313408 width=355) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_1198] (rows=417313408 width=355) - predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_promo_sk is not null and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_195_d1_d_date_sk_min) AND DynamicValue(RS_195_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_195_d1_d_date_sk_bloom_filter))) - TableScan [TS_152] (rows=575995635 width=355) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 33 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1197] - Group By Operator [GBY_1196] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1130] - Group By Operator [GBY_1128] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1126] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1120] - <-Reducer 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1209] - PartitionCols:_col0 - Select Operator [SEL_1208] (rows=13257 width=4) - Output:["_col0"] - Filter Operator [FIL_1207] (rows=13257 width=228) - predicate:(_col1 > (2 * _col2)) - Group By Operator [GBY_1206] (rows=39773 width=228) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 48 [SIMPLE_EDGE] - SHUFFLE [RS_172] - PartitionCols:_col0 - Group By Operator [GBY_171] (rows=6482999 width=228) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 - Merge Join Operator [MERGEJOIN_1101] (rows=183085709 width=227) - Conds:RS_1205._col0, _col1=RS_1174._col0, _col1(Inner),Output:["_col0","_col2","_col5"] - <-Map 47 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1174] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_1172] - <-Map 55 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1205] - PartitionCols:_col0, _col1 - Select Operator [SEL_1204] (rows=287989836 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1203] (rows=287989836 width=119) - predicate:(cs_item_sk is not null and cs_order_number is not null and cs_item_sk BETWEEN DynamicValue(RS_192_item_i_item_sk_min) AND DynamicValue(RS_192_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_192_item_i_item_sk_bloom_filter))) - TableScan [TS_161] (rows=287989836 width=119) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] - <-Reducer 43 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1202] - Group By Operator [GBY_1201] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 40 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1165] - Group By Operator [GBY_1163] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1161] (rows=4667 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1157] + Merge Join Operator [MERGEJOIN_1097] (rows=23886447 width=788) + Conds:RS_1197._col0, _col1=RS_94._col12, _col19(Inner),Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col11","_col15","_col16","_col22","_col23","_col24","_col25","_col26"] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1197] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_1196] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_94] + PartitionCols:_col12, _col19 + Merge Join Operator [MERGEJOIN_1096] (rows=14487982 width=661) + Conds:RS_1157._col0=RS_90._col11(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col9","_col12","_col13","_col14","_col19","_col20","_col21","_col22","_col23","_col24"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1157] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1155] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_90] + PartitionCols:_col11 + Merge Join Operator [MERGEJOIN_1095] (rows=14487982 width=300) + Conds:RS_1193._col0=RS_86._col9(Inner),Output:["_col1","_col2","_col4","_col7","_col8","_col9","_col11","_col14","_col15","_col16","_col17","_col18","_col19"] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1193] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1192] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_86] + PartitionCols:_col9 + Merge Join Operator [MERGEJOIN_1094] (rows=14487982 width=123) + Conds:RS_1147._col0=RS_82._col5(Inner),Output:["_col1","_col4","_col5","_col6","_col8","_col9","_col11","_col12","_col13","_col14","_col15","_col16"] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1147] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1145] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_82] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_1093] (rows=14487982 width=119) + Conds:RS_1189._col0=RS_78._col7(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Map 41 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1189] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1188] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_78] + PartitionCols:_col7 + Merge Join Operator [MERGEJOIN_1092] (rows=14487982 width=119) + Conds:RS_73._col1=RS_1186._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_73] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1090] (rows=14487982 width=119) + Conds:RS_70._col0=RS_1136._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1136] + PartitionCols:_col0 + Select Operator [SEL_1132] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_1129] (rows=652 width=8) + predicate:((d_year = 2000) and d_date_sk is not null) + Please refer to the previous TableScan [TS_6] + <-Reducer 43 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_1089] (rows=40575792 width=314) + Conds:RS_1163._col1=RS_1166._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 44 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1166] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1165] + <-Map 42 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1163] + PartitionCols:_col1 + Select Operator [SEL_1162] (rows=417313408 width=355) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Filter Operator [FIL_1161] (rows=417313408 width=355) + predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_promo_sk is not null and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_71_d1_d_date_sk_min) AND DynamicValue(RS_71_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_71_d1_d_date_sk_bloom_filter))) + TableScan [TS_43] (rows=575995635 width=355) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1160] + Group By Operator [GBY_1159] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1142] + Group By Operator [GBY_1140] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1137] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1132] + <-Reducer 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1186] + PartitionCols:_col0 + Select Operator [SEL_1185] (rows=13257 width=4) + Output:["_col0"] + Filter Operator [FIL_1184] (rows=13257 width=228) + predicate:(_col1 > (2 * _col2)) + Group By Operator [GBY_1183] (rows=39773 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 49 [SIMPLE_EDGE] + SHUFFLE [RS_63] + PartitionCols:_col0 + Group By Operator [GBY_62] (rows=6482999 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 + Merge Join Operator [MERGEJOIN_1091] (rows=183085709 width=227) + Conds:RS_1178._col0, _col1=RS_1181._col0, _col1(Inner),Output:["_col0","_col2","_col5"] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1181] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_1180] + <-Map 48 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1178] + PartitionCols:_col0, _col1 + Select Operator [SEL_1177] (rows=287989836 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1176] (rows=287989836 width=119) + predicate:(cs_item_sk is not null and cs_order_number is not null and cs_item_sk BETWEEN DynamicValue(RS_68_item_i_item_sk_min) AND DynamicValue(RS_68_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_68_item_i_item_sk_bloom_filter))) + TableScan [TS_52] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] + <-Reducer 45 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1175] + Group By Operator [GBY_1174] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 44 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1172] + Group By Operator [GBY_1170] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1167] (rows=4667 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1165] diff --git a/ql/src/test/results/clientpositive/perf/tez/query65.q.out b/ql/src/test/results/clientpositive/perf/tez/query65.q.out index 2571ddfe65..836eb566d7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query65.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query65.q.out @@ -67,138 +67,140 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Map 6 <- Reducer 13 (BROADCAST_EDGE) +Reducer 10 <- Map 12 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 13 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 7 <- Map 12 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 11 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_173] - Limit [LIM_172] (rows=100 width=702) + Reducer 3 vectorized + File Output Operator [FS_175] + Limit [LIM_174] (rows=100 width=702) Number of rows:100 - Select Operator [SEL_171] (rows=65392 width=700) + Select Operator [SEL_173] (rows=65392 width=700) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_53] - Select Operator [SEL_52] (rows=65392 width=700) + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_55] + Select Operator [SEL_54] (rows=65392 width=700) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Top N Key Operator [TNK_82] (rows=65392 width=704) - keys:_col6, _col8,top n:100 - Merge Join Operator [MERGEJOIN_140] (rows=65392 width=704) - Conds:RS_49._col1=RS_170._col0(Inner),Output:["_col2","_col6","_col8","_col9","_col10","_col11"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_170] + Top N Key Operator [TNK_84] (rows=65392 width=704) + keys:_col6, _col1,top n:100 + Merge Join Operator [MERGEJOIN_142] (rows=65392 width=704) + Conds:RS_145._col0=RS_52._col3(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col9"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_145] PartitionCols:_col0 - Select Operator [SEL_169] (rows=462000 width=511) + Select Operator [SEL_144] (rows=462000 width=511) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_168] (rows=462000 width=511) + Filter Operator [FIL_143] (rows=462000 width=511) predicate:i_item_sk is not null - TableScan [TS_39] (rows=462000 width=511) + TableScan [TS_0] (rows=462000 width=511) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_desc","i_current_price","i_wholesale_cost","i_brand"] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_139] (rows=65392 width=204) - Conds:RS_46._col0=RS_167._col0(Inner),Output:["_col1","_col2","_col6"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_167] + SHUFFLE [RS_52] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_141] (rows=65392 width=204) + Conds:RS_148._col0=RS_48._col0(Inner),Output:["_col1","_col3","_col4"] + <-Map 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_148] PartitionCols:_col0 - Select Operator [SEL_166] (rows=1704 width=92) + Select Operator [SEL_147] (rows=1704 width=92) Output:["_col0","_col1"] - Filter Operator [FIL_165] (rows=1704 width=92) + Filter Operator [FIL_146] (rows=1704 width=92) predicate:s_store_sk is not null - TableScan [TS_36] (rows=1704 width=92) + TableScan [TS_3] (rows=1704 width=92) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_46] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_48] PartitionCols:_col0 - Filter Operator [FIL_45] (rows=65392 width=231) - predicate:(_col2 <= _col4) - Merge Join Operator [MERGEJOIN_138] (rows=196176 width=231) - Conds:RS_158._col0=RS_164._col0(Inner),Output:["_col0","_col1","_col2","_col4"] - <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_158] - PartitionCols:_col0 - Filter Operator [FIL_157] (rows=184637 width=118) - predicate:_col2 is not null - Group By Operator [GBY_156] (rows=184637 width=118) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_11] - PartitionCols:_col0, _col1 - Group By Operator [GBY_10] (rows=6093021 width=118) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Merge Join Operator [MERGEJOIN_136] (rows=91197860 width=89) - Conds:RS_154._col0=RS_143._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] - PartitionCols:_col0 - Select Operator [SEL_142] (rows=317 width=4) - Output:["_col0"] - Filter Operator [FIL_141] (rows=317 width=8) - predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_154] - PartitionCols:_col0 - Select Operator [SEL_152] (rows=525329897 width=118) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_150] (rows=525329897 width=118) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=118) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_149] - Group By Operator [GBY_148] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_147] - Group By Operator [GBY_146] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_144] (rows=317 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_142] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] - PartitionCols:_col0 - Select Operator [SEL_163] (rows=17 width=115) - Output:["_col0","_col1"] - Filter Operator [FIL_162] (rows=17 width=123) - predicate:CAST( (_col1 / _col2) AS decimal(21,6)) is not null - Group By Operator [GBY_161] (rows=17 width=123) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 - Select Operator [SEL_160] (rows=184637 width=118) - Output:["_col1","_col2"] - Group By Operator [GBY_159] (rows=184637 width=118) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col0 - Group By Operator [GBY_26] (rows=6093021 width=118) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Merge Join Operator [MERGEJOIN_137] (rows=91197860 width=89) - Conds:RS_155._col0=RS_145._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_145] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_142] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_155] - PartitionCols:_col0 - Select Operator [SEL_153] (rows=525329897 width=118) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_151] (rows=525329897 width=118) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) - Please refer to the previous TableScan [TS_0] + Select Operator [SEL_46] (rows=65392 width=231) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_45] (rows=65392 width=231) + predicate:(_col2 <= _col4) + Merge Join Operator [MERGEJOIN_140] (rows=196176 width=231) + Conds:RS_166._col0=RS_172._col0(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_172] + PartitionCols:_col0 + Select Operator [SEL_171] (rows=17 width=115) + Output:["_col0","_col1"] + Filter Operator [FIL_170] (rows=17 width=123) + predicate:CAST( (_col1 / _col2) AS decimal(21,6)) is not null + Group By Operator [GBY_169] (rows=17 width=123) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 + Select Operator [SEL_168] (rows=184637 width=118) + Output:["_col1","_col2"] + Group By Operator [GBY_167] (rows=184637 width=118) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col0 + Group By Operator [GBY_32] (rows=6093021 width=118) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Merge Join Operator [MERGEJOIN_139] (rows=91197860 width=89) + Conds:RS_163._col0=RS_153._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_153] + PartitionCols:_col0 + Select Operator [SEL_150] (rows=317 width=4) + Output:["_col0"] + Filter Operator [FIL_149] (rows=317 width=8) + predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) + TableScan [TS_9] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_163] + PartitionCols:_col0 + Select Operator [SEL_161] (rows=525329897 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_159] (rows=525329897 width=118) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_6] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_157] + Group By Operator [GBY_156] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_155] + Group By Operator [GBY_154] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_152] (rows=317 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_150] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_166] + PartitionCols:_col0 + Filter Operator [FIL_165] (rows=184637 width=118) + predicate:_col2 is not null + Group By Operator [GBY_164] (rows=184637 width=118) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0, _col1 + Group By Operator [GBY_16] (rows=6093021 width=118) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 + Merge Join Operator [MERGEJOIN_138] (rows=91197860 width=89) + Conds:RS_162._col0=RS_151._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_150] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_162] + PartitionCols:_col0 + Select Operator [SEL_160] (rows=525329897 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_158] (rows=525329897 width=118) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) + Please refer to the previous TableScan [TS_6] diff --git a/ql/src/test/results/clientpositive/perf/tez/query66.q.out b/ql/src/test/results/clientpositive/perf/tez/query66.q.out index 8193c2dfcb..91a5d705a6 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query66.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query66.q.out @@ -457,200 +457,200 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 18 (BROADCAST_EDGE) -Map 21 <- Reducer 19 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 12 <- Map 16 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 17 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Map 20 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 17 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 17 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 8 <- Union 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Map 21 <- Reducer 20 (BROADCAST_EDGE) +Map 9 <- Reducer 19 (BROADCAST_EDGE) +Reducer 10 <- Map 13 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 11 <- Map 17 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 18 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 15 <- Map 17 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Map 18 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 20 <- Map 18 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 1 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE), Union 4 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 9 vectorized - File Output Operator [FS_259] - Select Operator [SEL_258] (rows=100 width=4614) + Reducer 6 vectorized + File Output Operator [FS_260] + Select Operator [SEL_259] (rows=100 width=4614) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43"] - Limit [LIM_257] (rows=100 width=4510) + Limit [LIM_258] (rows=100 width=4510) Number of rows:100 - Select Operator [SEL_256] (rows=2423925 width=4510) + Select Operator [SEL_257] (rows=2423925 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] - <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_255] - Group By Operator [GBY_254] (rows=2423925 width=4510) + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_256] + Group By Operator [GBY_255] (rows=2423925 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)","sum(VALUE._col24)","sum(VALUE._col25)","sum(VALUE._col26)","sum(VALUE._col27)","sum(VALUE._col28)","sum(VALUE._col29)","sum(VALUE._col30)","sum(VALUE._col31)","sum(VALUE._col32)","sum(VALUE._col33)","sum(VALUE._col34)","sum(VALUE._col35)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Union 7 [SIMPLE_EDGE] - <-Reducer 15 [CONTAINS] vectorized - Reduce Output Operator [RS_269] + <-Union 4 [SIMPLE_EDGE] + <-Reducer 3 [CONTAINS] vectorized + Reduce Output Operator [RS_254] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_268] (rows=2513727 width=4510) + Group By Operator [GBY_253] (rows=2513727 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)","sum(_col32)","sum(_col33)","sum(_col34)","sum(_col35)","sum(_col36)","sum(_col37)","sum(_col38)","sum(_col39)","sum(_col40)","sum(_col41)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_267] (rows=2513727 width=3166) + Select Operator [SEL_252] (rows=2513727 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] - Top N Key Operator [TNK_266] (rows=2513727 width=3166) + Top N Key Operator [TNK_251] (rows=2513727 width=3166) keys:_col0, _col1, _col2, _col3, _col4, _col5,top n:100 - Group By Operator [GBY_265] (rows=2513700 width=3166) + Group By Operator [GBY_250] (rows=27 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_63] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_31] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_62] (rows=12905590 width=3166) + Group By Operator [GBY_30] (rows=27 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_60] (rows=12905590 width=750) + Select Operator [SEL_28] (rows=6624114 width=750) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] - Merge Join Operator [MERGEJOIN_209] (rows=12905590 width=750) - Conds:RS_57._col3=RS_248._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_248] + Merge Join Operator [MERGEJOIN_206] (rows=6624114 width=750) + Conds:RS_225._col0=RS_26._col3(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_225] PartitionCols:_col0 - Select Operator [SEL_246] (rows=27 width=482) + Select Operator [SEL_224] (rows=27 width=482) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_245] (rows=27 width=482) + Filter Operator [FIL_223] (rows=27 width=482) predicate:w_warehouse_sk is not null - TableScan [TS_12] (rows=27 width=482) + TableScan [TS_0] (rows=27 width=482) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name","w_warehouse_sq_ft","w_city","w_county","w_state","w_country"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_57] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_26] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_208] (rows=12905590 width=275) - Conds:RS_54._col2=RS_226._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_226] + Merge Join Operator [MERGEJOIN_205] (rows=6624114 width=275) + Conds:RS_21._col2=RS_229._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_229] PartitionCols:_col0 - Select Operator [SEL_223] (rows=1 width=4) + Select Operator [SEL_228] (rows=1 width=4) Output:["_col0"] - Filter Operator [FIL_222] (rows=1 width=88) + Filter Operator [FIL_227] (rows=1 width=88) predicate:((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) - TableScan [TS_9] (rows=1 width=88) + TableScan [TS_12] (rows=1 width=88) default@ship_mode,ship_mode,Tbl:COMPLETE,Col:COMPLETE,Output:["sm_ship_mode_sk","sm_carrier"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_54] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_207] (rows=38716771 width=279) - Conds:RS_51._col0=RS_244._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_244] + Merge Join Operator [MERGEJOIN_204] (rows=19872342 width=279) + Conds:RS_18._col0=RS_248._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_248] PartitionCols:_col0 - Select Operator [SEL_242] (rows=652 width=52) + Select Operator [SEL_247] (rows=652 width=52) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - Filter Operator [FIL_241] (rows=652 width=12) + Filter Operator [FIL_246] (rows=652 width=12) predicate:((d_year = 2002) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=12) + TableScan [TS_9] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_51] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_206] (rows=109204159 width=235) - Conds:RS_264._col1=RS_240._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_240] + Merge Join Operator [MERGEJOIN_203] (rows=55655511 width=235) + Conds:RS_241._col1=RS_244._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_244] PartitionCols:_col0 - Select Operator [SEL_238] (rows=33426 width=4) + Select Operator [SEL_243] (rows=33426 width=4) Output:["_col0"] - Filter Operator [FIL_237] (rows=33426 width=8) + Filter Operator [FIL_242] (rows=33426 width=8) predicate:(t_time BETWEEN 49530 AND 78330 and t_time_sk is not null) - TableScan [TS_3] (rows=86400 width=8) + TableScan [TS_6] (rows=86400 width=8) default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_time"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_264] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_241] PartitionCols:_col1 - Select Operator [SEL_263] (rows=282272460 width=239) + Select Operator [SEL_240] (rows=143859154 width=239) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_262] (rows=282272460 width=243) - predicate:(cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_ship_mode_sk is not null and cs_ship_mode_sk BETWEEN DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_bloom_filter))) - TableScan [TS_33] (rows=287989836 width=243) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_ship_mode_sk","cs_warehouse_sk","cs_quantity","cs_ext_sales_price","cs_net_paid_inc_ship_tax"] + Filter Operator [FIL_239] (rows=143859154 width=243) + predicate:(ws_sold_time_sk is not null and ws_warehouse_sk is not null and ws_sold_date_sk is not null and ws_ship_mode_sk is not null and ws_ship_mode_sk BETWEEN DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(ws_ship_mode_sk, DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_bloom_filter))) + TableScan [TS_3] (rows=144002668 width=243) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_ship_mode_sk","ws_warehouse_sk","ws_quantity","ws_sales_price","ws_net_paid_inc_tax"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_261] - Group By Operator [GBY_260] (rows=1 width=12) + BROADCAST [RS_238] + Group By Operator [GBY_237] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_231] - Group By Operator [GBY_229] (rows=1 width=12) + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_235] + Group By Operator [GBY_233] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_227] (rows=1 width=4) + Select Operator [SEL_230] (rows=1 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_223] - <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_253] + Please refer to the previous Select Operator [SEL_228] + <-Reducer 8 [CONTAINS] vectorized + Reduce Output Operator [RS_270] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_252] (rows=2513727 width=4510) + Group By Operator [GBY_269] (rows=2513727 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)","sum(_col32)","sum(_col33)","sum(_col34)","sum(_col35)","sum(_col36)","sum(_col37)","sum(_col38)","sum(_col39)","sum(_col40)","sum(_col41)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_251] (rows=2513727 width=3166) + Select Operator [SEL_268] (rows=2513727 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] - Top N Key Operator [TNK_250] (rows=2513727 width=3166) + Top N Key Operator [TNK_267] (rows=2513727 width=3166) keys:_col0, _col1, _col2, _col3, _col4, _col5,top n:100 - Group By Operator [GBY_249] (rows=27 width=3166) + Group By Operator [GBY_266] (rows=2513700 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_30] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_64] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_29] (rows=27 width=3166) + Group By Operator [GBY_63] (rows=12905590 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_27] (rows=6624114 width=750) + Select Operator [SEL_61] (rows=12905590 width=750) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] - Merge Join Operator [MERGEJOIN_205] (rows=6624114 width=750) - Conds:RS_24._col3=RS_247._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_247] + Merge Join Operator [MERGEJOIN_210] (rows=12905590 width=750) + Conds:RS_58._col3=RS_226._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_226] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_246] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] + Please refer to the previous Select Operator [SEL_224] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_58] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_204] (rows=6624114 width=275) - Conds:RS_21._col2=RS_224._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_224] + Merge Join Operator [MERGEJOIN_209] (rows=12905590 width=275) + Conds:RS_55._col2=RS_231._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_231] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_223] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] + Please refer to the previous Select Operator [SEL_228] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_55] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_203] (rows=19872342 width=279) - Conds:RS_18._col0=RS_243._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_243] + Merge Join Operator [MERGEJOIN_208] (rows=38716771 width=279) + Conds:RS_52._col0=RS_249._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_249] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_242] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_18] + Please refer to the previous Select Operator [SEL_247] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_52] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_202] (rows=55655511 width=235) - Conds:RS_236._col1=RS_239._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_239] + Merge Join Operator [MERGEJOIN_207] (rows=109204159 width=235) + Conds:RS_265._col1=RS_245._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_245] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_238] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_236] + Please refer to the previous Select Operator [SEL_243] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_265] PartitionCols:_col1 - Select Operator [SEL_235] (rows=143859154 width=239) + Select Operator [SEL_264] (rows=282272460 width=239) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_234] (rows=143859154 width=243) - predicate:(ws_sold_time_sk is not null and ws_warehouse_sk is not null and ws_sold_date_sk is not null and ws_ship_mode_sk is not null and ws_ship_mode_sk BETWEEN DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(ws_ship_mode_sk, DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=243) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_ship_mode_sk","ws_warehouse_sk","ws_quantity","ws_sales_price","ws_net_paid_inc_tax"] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_233] - Group By Operator [GBY_232] (rows=1 width=12) + Filter Operator [FIL_263] (rows=282272460 width=243) + predicate:(cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_ship_mode_sk is not null and cs_ship_mode_sk BETWEEN DynamicValue(RS_56_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_56_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_56_ship_mode_sm_ship_mode_sk_bloom_filter))) + TableScan [TS_34] (rows=287989836 width=243) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_ship_mode_sk","cs_warehouse_sk","cs_quantity","cs_ext_sales_price","cs_net_paid_inc_ship_tax"] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_262] + Group By Operator [GBY_261] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_230] - Group By Operator [GBY_228] (rows=1 width=12) + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_236] + Group By Operator [GBY_234] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_225] (rows=1 width=4) + Select Operator [SEL_232] (rows=1 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_223] + Please refer to the previous Select Operator [SEL_228] diff --git a/ql/src/test/results/clientpositive/perf/tez/query67.q.out b/ql/src/test/results/clientpositive/perf/tez/query67.q.out index 9c950c3b37..a46f07f0ab 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query67.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query67.q.out @@ -97,108 +97,108 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Map 6 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 8 <- Map 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_115] - Limit [LIM_114] (rows=100 width=617) + Reducer 5 vectorized + File Output Operator [FS_116] + Limit [LIM_115] (rows=100 width=617) Number of rows:100 - Select Operator [SEL_113] (rows=273593580 width=617) + Select Operator [SEL_114] (rows=273593580 width=617) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_112] - Select Operator [SEL_111] (rows=273593580 width=617) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_113] + Select Operator [SEL_112] (rows=273593580 width=617) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Top N Key Operator [TNK_110] (rows=273593580 width=613) + Top N Key Operator [TNK_111] (rows=273593580 width=613) keys:_col2, _col1, _col0, _col3, _col4, _col6, _col5, _col7, _col8, rank_window_0,top n:100 - Filter Operator [FIL_109] (rows=273593580 width=613) + Filter Operator [FIL_110] (rows=273593580 width=613) predicate:(rank_window_0 <= 100) - PTF Operator [PTF_108] (rows=820780740 width=613) + PTF Operator [PTF_109] (rows=820780740 width=613) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col8 DESC NULLS LAST","partition by:":"_col2"}] - Select Operator [SEL_107] (rows=820780740 width=613) + Select Operator [SEL_108] (rows=820780740 width=613) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_106] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_107] PartitionCols:_col2 - Select Operator [SEL_105] (rows=820780740 width=613) + Select Operator [SEL_106] (rows=820780740 width=613) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Top N Key Operator [TNK_104] (rows=820780740 width=621) + Top N Key Operator [TNK_105] (rows=820780740 width=621) PartitionCols:_col2,keys:_col2, _col9,top n:101 - Group By Operator [GBY_103] (rows=820780740 width=621) + Group By Operator [GBY_104] (rows=820780740 width=621) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_23] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Group By Operator [GBY_22] (rows=820780740 width=621) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col3)"],keys:_col11, _col12, _col13, _col14, _col5, _col6, _col7, _col9, 0L - Merge Join Operator [MERGEJOIN_85] (rows=91197860 width=613) - Conds:RS_18._col1=RS_102._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col14"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_102] + Group By Operator [GBY_23] (rows=820780740 width=621) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col8)"],keys:_col1, _col2, _col3, _col4, _col10, _col11, _col12, _col14, 0L + Merge Join Operator [MERGEJOIN_86] (rows=91197860 width=613) + Conds:RS_89._col0=RS_20._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col8","_col10","_col11","_col12","_col14"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_89] PartitionCols:_col0 - Select Operator [SEL_101] (rows=462000 width=393) + Select Operator [SEL_88] (rows=462000 width=393) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_100] (rows=462000 width=393) + Filter Operator [FIL_87] (rows=462000 width=393) predicate:i_item_sk is not null - TableScan [TS_9] (rows=462000 width=393) + TableScan [TS_0] (rows=462000 width=393) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_20] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_84] (rows=91197860 width=228) - Conds:RS_15._col2=RS_99._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col9"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_99] + Merge Join Operator [MERGEJOIN_85] (rows=91197860 width=228) + Conds:RS_15._col2=RS_103._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col9"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_103] PartitionCols:_col0 - Select Operator [SEL_98] (rows=1704 width=104) + Select Operator [SEL_102] (rows=1704 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_97] (rows=1704 width=104) + Filter Operator [FIL_101] (rows=1704 width=104) predicate:s_store_sk is not null - TableScan [TS_6] (rows=1704 width=104) + TableScan [TS_9] (rows=1704 width=104) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_83] (rows=91197860 width=130) - Conds:RS_96._col0=RS_88._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_88] + Merge Join Operator [MERGEJOIN_84] (rows=91197860 width=130) + Conds:RS_100._col0=RS_92._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_92] PartitionCols:_col0 - Select Operator [SEL_87] (rows=317 width=16) + Select Operator [SEL_91] (rows=317 width=16) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_86] (rows=317 width=20) + Filter Operator [FIL_90] (rows=317 width=20) predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=20) + TableScan [TS_6] (rows=73049 width=20) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_96] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_100] PartitionCols:_col0 - Select Operator [SEL_95] (rows=525329897 width=123) + Select Operator [SEL_99] (rows=525329897 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_94] (rows=525329897 width=122) + Filter Operator [FIL_98] (rows=525329897 width=122) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=122) + TableScan [TS_3] (rows=575995635 width=122) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_quantity","ss_sales_price"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_93] - Group By Operator [GBY_92] (rows=1 width=12) + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_97] + Group By Operator [GBY_96] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_91] - Group By Operator [GBY_90] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_95] + Group By Operator [GBY_94] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_89] (rows=317 width=4) + Select Operator [SEL_93] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_87] + Please refer to the previous Select Operator [SEL_91] diff --git a/ql/src/test/results/clientpositive/perf/tez/query68.q.out b/ql/src/test/results/clientpositive/perf/tez/query68.q.out index 6426f59a90..8def1bb618 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query68.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query68.q.out @@ -113,122 +113,122 @@ Stage-0 limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_176] - Limit [LIM_175] (rows=100 width=706) + File Output Operator [FS_177] + Limit [LIM_176] (rows=100 width=706) Number of rows:100 - Select Operator [SEL_174] (rows=727776 width=706) + Select Operator [SEL_175] (rows=727776 width=706) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_46] - Select Operator [SEL_45] (rows=727776 width=706) + SHUFFLE [RS_47] + Select Operator [SEL_46] (rows=727776 width=706) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Top N Key Operator [TNK_80] (rows=727776 width=706) + Top N Key Operator [TNK_81] (rows=727776 width=706) keys:_col3, _col6,top n:100 - Filter Operator [FIL_44] (rows=727776 width=706) + Filter Operator [FIL_45] (rows=727776 width=706) predicate:(_col5 <> _col8) - Merge Join Operator [MERGEJOIN_146] (rows=727776 width=706) - Conds:RS_41._col0=RS_173._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_147] (rows=727776 width=706) + Conds:RS_42._col0=RS_174._col1(Inner),Output:["_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_41] + SHUFFLE [RS_42] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_141] (rows=80000000 width=277) - Conds:RS_149._col1=RS_152._col0(Inner),Output:["_col0","_col2","_col3","_col5"] + Merge Join Operator [MERGEJOIN_142] (rows=80000000 width=277) + Conds:RS_150._col1=RS_153._col0(Inner),Output:["_col0","_col2","_col3","_col5"] <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_152] + SHUFFLE [RS_153] PartitionCols:_col0 - Select Operator [SEL_151] (rows=40000000 width=97) + Select Operator [SEL_152] (rows=40000000 width=97) Output:["_col0","_col1"] - Filter Operator [FIL_150] (rows=40000000 width=97) + Filter Operator [FIL_151] (rows=40000000 width=97) predicate:ca_address_sk is not null TableScan [TS_3] (rows=40000000 width=97) default@customer_address,current_addr,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_city"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] + SHUFFLE [RS_150] PartitionCols:_col1 - Select Operator [SEL_148] (rows=80000000 width=188) + Select Operator [SEL_149] (rows=80000000 width=188) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_147] (rows=80000000 width=188) + Filter Operator [FIL_148] (rows=80000000 width=188) predicate:(c_customer_sk is not null and c_current_addr_sk is not null) TableScan [TS_0] (rows=80000000 width=188) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_173] + SHUFFLE [RS_174] PartitionCols:_col1 - Select Operator [SEL_172] (rows=727776 width=433) + Select Operator [SEL_173] (rows=727776 width=433) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_171] (rows=727776 width=433) + Group By Operator [GBY_172] (rows=727776 width=433) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_35] + SHUFFLE [RS_36] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_34] (rows=727776 width=433) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col1, _col13, _col3, _col5 - Merge Join Operator [MERGEJOIN_145] (rows=727776 width=97) - Conds:RS_30._col3=RS_153._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col13"] + Group By Operator [GBY_35] (rows=727776 width=433) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col8)","sum(_col9)","sum(_col10)"],keys:_col3, _col1, _col5, _col7 + Merge Join Operator [MERGEJOIN_146] (rows=727776 width=97) + Conds:RS_154._col0=RS_32._col3(Inner),Output:["_col1","_col3","_col5","_col7","_col8","_col9","_col10"] <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] + SHUFFLE [RS_154] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_151] + Please refer to the previous Select Operator [SEL_152] <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_30] + SHUFFLE [RS_32] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_144] (rows=727776 width=4) - Conds:RS_27._col2=RS_170._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_145] (rows=727776 width=4) + Conds:RS_27._col2=RS_171._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_170] + SHUFFLE [RS_171] PartitionCols:_col0 - Select Operator [SEL_169] (rows=1855 width=4) + Select Operator [SEL_170] (rows=1855 width=4) Output:["_col0"] - Filter Operator [FIL_168] (rows=1855 width=12) + Filter Operator [FIL_169] (rows=1855 width=12) predicate:(((hd_dep_count = 2) or (hd_vehicle_count = 1)) and hd_demo_sk is not null) - TableScan [TS_15] (rows=7200 width=12) + TableScan [TS_18] (rows=7200 width=12) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_143] (rows=2824787 width=4) - Conds:RS_24._col4=RS_167._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_144] (rows=2824787 width=4) + Conds:RS_24._col4=RS_168._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_167] + SHUFFLE [RS_168] PartitionCols:_col0 - Select Operator [SEL_166] (rows=14 width=4) + Select Operator [SEL_167] (rows=14 width=4) Output:["_col0"] - Filter Operator [FIL_165] (rows=14 width=97) + Filter Operator [FIL_166] (rows=14 width=97) predicate:((s_city) IN ('Cedar Grove', 'Wildwood') and s_store_sk is not null) - TableScan [TS_12] (rows=1704 width=97) + TableScan [TS_15] (rows=1704 width=97) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_city"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_142] (rows=42598570 width=185) - Conds:RS_164._col0=RS_156._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_143] (rows=42598570 width=185) + Conds:RS_165._col0=RS_157._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] + SHUFFLE [RS_157] PartitionCols:_col0 - Select Operator [SEL_155] (rows=170 width=4) + Select Operator [SEL_156] (rows=170 width=4) Output:["_col0"] - Filter Operator [FIL_154] (rows=170 width=12) + Filter Operator [FIL_155] (rows=170 width=12) predicate:(d_dom BETWEEN 1 AND 2 and (d_year) IN (1998, 1999, 2000) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=12) + TableScan [TS_12] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dom"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] + SHUFFLE [RS_165] PartitionCols:_col0 - Select Operator [SEL_163] (rows=457565061 width=343) + Select Operator [SEL_164] (rows=457565061 width=343) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_162] (rows=457565061 width=343) + Filter Operator [FIL_163] (rows=457565061 width=343) predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_6] (rows=575995635 width=343) + TableScan [TS_9] (rows=575995635 width=343) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_ticket_number","ss_ext_sales_price","ss_ext_list_price","ss_ext_tax"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_161] - Group By Operator [GBY_160] (rows=1 width=12) + BROADCAST [RS_162] + Group By Operator [GBY_161] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] - Group By Operator [GBY_158] (rows=1 width=12) + SHUFFLE [RS_160] + Group By Operator [GBY_159] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_157] (rows=170 width=4) + Select Operator [SEL_158] (rows=170 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_155] + Please refer to the previous Select Operator [SEL_156] diff --git a/ql/src/test/results/clientpositive/perf/tez/query7.q.out b/ql/src/test/results/clientpositive/perf/tez/query7.q.out index cc896fa9fa..8c132c7809 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query7.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query7.q.out @@ -53,109 +53,109 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Map 5 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 7 <- Map 11 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 12 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_130] - Limit [LIM_129] (rows=100 width=444) + Reducer 4 vectorized + File Output Operator [FS_131] + Limit [LIM_130] (rows=100 width=444) Number of rows:100 - Select Operator [SEL_128] (rows=310774 width=444) + Select Operator [SEL_129] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_127] - Select Operator [SEL_126] (rows=310774 width=444) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_128] + Select Operator [SEL_127] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_125] (rows=310774 width=476) + Group By Operator [GBY_126] (rows=310774 width=476) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_29] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col0 - Group By Operator [GBY_28] (rows=462000 width=476) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col12 - Top N Key Operator [TNK_58] (rows=1441769 width=100) - keys:_col12,top n:100 - Merge Join Operator [MERGEJOIN_104] (rows=1441769 width=100) - Conds:RS_24._col1=RS_124._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col12"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_124] + Group By Operator [GBY_29] (rows=462000 width=476) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col6)","count(_col6)","sum(_col7)","count(_col7)","sum(_col9)","count(_col9)","sum(_col8)","count(_col8)"],keys:_col1 + Top N Key Operator [TNK_59] (rows=1441769 width=100) + keys:_col1,top n:100 + Merge Join Operator [MERGEJOIN_105] (rows=1441769 width=100) + Conds:RS_108._col0=RS_26._col1(Inner),Output:["_col1","_col6","_col7","_col8","_col9"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_108] PartitionCols:_col0 - Select Operator [SEL_123] (rows=462000 width=104) + Select Operator [SEL_107] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_122] (rows=462000 width=104) + Filter Operator [FIL_106] (rows=462000 width=104) predicate:i_item_sk is not null - TableScan [TS_12] (rows=462000 width=104) + TableScan [TS_0] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_26] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_103] (rows=1441769 width=4) - Conds:RS_21._col3=RS_121._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_121] + Merge Join Operator [MERGEJOIN_104] (rows=1441769 width=4) + Conds:RS_21._col3=RS_125._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] PartitionCols:_col0 - Select Operator [SEL_120] (rows=2300 width=4) + Select Operator [SEL_124] (rows=2300 width=4) Output:["_col0"] - Filter Operator [FIL_119] (rows=2300 width=174) + Filter Operator [FIL_123] (rows=2300 width=174) predicate:(((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk is not null) - TableScan [TS_9] (rows=2300 width=174) + TableScan [TS_12] (rows=2300 width=174) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_email","p_channel_event"] - <-Reducer 3 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_102] (rows=1441769 width=4) - Conds:RS_18._col0=RS_118._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] + Merge Join Operator [MERGEJOIN_103] (rows=1441769 width=4) + Conds:RS_18._col0=RS_122._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_122] PartitionCols:_col0 - Select Operator [SEL_117] (rows=652 width=4) + Select Operator [SEL_121] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_116] (rows=652 width=8) + Filter Operator [FIL_120] (rows=652 width=8) predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=8) + TableScan [TS_9] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_101] (rows=4037893 width=4) - Conds:RS_115._col2=RS_107._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_107] + Merge Join Operator [MERGEJOIN_102] (rows=4037893 width=4) + Conds:RS_119._col2=RS_111._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_111] PartitionCols:_col0 - Select Operator [SEL_106] (rows=14776 width=4) + Select Operator [SEL_110] (rows=14776 width=4) Output:["_col0"] - Filter Operator [FIL_105] (rows=14776 width=268) + Filter Operator [FIL_109] (rows=14776 width=268) predicate:((cd_marital_status = 'W') and (cd_education_status = 'Primary') and (cd_gender = 'F') and cd_demo_sk is not null) - TableScan [TS_3] (rows=1861800 width=268) + TableScan [TS_6] (rows=1861800 width=268) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_115] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_119] PartitionCols:_col2 - Select Operator [SEL_114] (rows=501686735 width=340) + Select Operator [SEL_118] (rows=501686735 width=340) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_113] (rows=501686735 width=340) + Filter Operator [FIL_117] (rows=501686735 width=340) predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_promo_sk is not null and ss_item_sk is not null and ss_cdemo_sk BETWEEN DynamicValue(RS_16_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_16_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_16_customer_demographics_cd_demo_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=340) + TableScan [TS_3] (rows=575995635 width=340) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_promo_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_112] - Group By Operator [GBY_111] (rows=1 width=12) + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_116] + Group By Operator [GBY_115] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_110] - Group By Operator [GBY_109] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_114] + Group By Operator [GBY_113] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_108] (rows=14776 width=4) + Select Operator [SEL_112] (rows=14776 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_106] + Please refer to the previous Select Operator [SEL_110] diff --git a/ql/src/test/results/clientpositive/perf/tez/query71.q.out b/ql/src/test/results/clientpositive/perf/tez/query71.q.out index dfcc81f7bf..16bfdcd711 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query71.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query71.q.out @@ -91,171 +91,171 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 9 (BROADCAST_EDGE) -Map 10 <- Reducer 13 (BROADCAST_EDGE) -Map 14 <- Reducer 17 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE), Union 3 (CONTAINS) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE), Union 3 (CONTAINS) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE), Union 3 (CONTAINS) -Reducer 4 <- Map 18 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) -Reducer 5 <- Map 19 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Map 11 <- Reducer 14 (BROADCAST_EDGE) +Map 15 <- Reducer 18 (BROADCAST_EDGE) +Map 5 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 8 <- Map 19 (SIMPLE_EDGE), Union 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_174] - Select Operator [SEL_173] (rows=1991967 width=223) + Reducer 4 vectorized + File Output Operator [FS_164] + Select Operator [SEL_163] (rows=1991967 width=223) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_172] - Select Operator [SEL_171] (rows=1991967 width=227) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_162] + Select Operator [SEL_161] (rows=1991967 width=227) Output:["_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_170] (rows=1991967 width=223) + Group By Operator [GBY_160] (rows=1991967 width=223) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_46] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_47] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_45] (rows=1991967 width=223) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col0)"],keys:_col4, _col7, _col8, _col5 - Merge Join Operator [MERGEJOIN_140] (rows=1991967 width=112) - Conds:RS_41._col2=RS_169._col0(Inner),Output:["_col0","_col4","_col5","_col7","_col8"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_169] + Group By Operator [GBY_46] (rows=1991967 width=223) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)"],keys:_col7, _col1, _col2, _col8 + Merge Join Operator [MERGEJOIN_141] (rows=1991967 width=112) + Conds:RS_156._col0=RS_43._col2(Inner),Output:["_col1","_col2","_col3","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_156] PartitionCols:_col0 - Select Operator [SEL_168] (rows=43200 width=12) + Select Operator [SEL_155] (rows=43200 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_167] (rows=43200 width=99) + Filter Operator [FIL_154] (rows=43200 width=99) predicate:((t_meal_time) IN ('breakfast', 'dinner') and t_time_sk is not null) - TableScan [TS_35] (rows=86400 width=99) + TableScan [TS_0] (rows=86400 width=99) default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_hour","t_minute","t_meal_time"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_41] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_43] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_139] (rows=3983933 width=104) - Conds:Union 3._col1=RS_166._col0(Inner),Output:["_col0","_col2","_col4","_col5"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_166] + Merge Join Operator [MERGEJOIN_140] (rows=3983933 width=104) + Conds:Union 7._col1=RS_159._col0(Inner),Output:["_col0","_col2","_col4","_col5"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_159] PartitionCols:_col0 - Select Operator [SEL_165] (rows=7333 width=107) + Select Operator [SEL_158] (rows=7333 width=107) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_164] (rows=7333 width=111) + Filter Operator [FIL_157] (rows=7333 width=111) predicate:((i_manager_id = 1) and i_item_sk is not null) - TableScan [TS_32] (rows=462000 width=111) + TableScan [TS_35] (rows=462000 width=111) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_brand","i_manager_id"] - <-Union 3 [SIMPLE_EDGE] - <-Reducer 11 [CONTAINS] - Reduce Output Operator [RS_148] + <-Union 7 [SIMPLE_EDGE] + <-Reducer 12 [CONTAINS] + Reduce Output Operator [RS_149] PartitionCols:_col1 - Select Operator [SEL_146] (rows=7751851 width=98) + Select Operator [SEL_147] (rows=7751851 width=98) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_145] (rows=7751851 width=98) - Conds:RS_185._col0=RS_177._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_177] + Merge Join Operator [MERGEJOIN_146] (rows=7751851 width=98) + Conds:RS_186._col0=RS_178._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_178] PartitionCols:_col0 - Select Operator [SEL_176] (rows=50 width=4) + Select Operator [SEL_177] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_175] (rows=50 width=12) + Filter Operator [FIL_176] (rows=50 width=12) predicate:((d_year = 2001) and (d_moy = 12) and d_date_sk is not null) - TableScan [TS_13] (rows=73049 width=12) + TableScan [TS_16] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_185] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_186] PartitionCols:_col0 - Select Operator [SEL_184] (rows=285116947 width=123) + Select Operator [SEL_185] (rows=285116947 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_183] (rows=285116947 width=123) - predicate:(cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_17_date_dim_d_date_sk_min) AND DynamicValue(RS_17_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_17_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_10] (rows=287989836 width=123) + Filter Operator [FIL_184] (rows=285116947 width=123) + predicate:(cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_13] (rows=287989836 width=123) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_182] - Group By Operator [GBY_181] (rows=1 width=12) + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_183] + Group By Operator [GBY_182] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_180] - Group By Operator [GBY_179] (rows=1 width=12) + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_181] + Group By Operator [GBY_180] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_178] (rows=50 width=4) + Select Operator [SEL_179] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_176] - <-Reducer 15 [CONTAINS] - Reduce Output Operator [RS_152] + Please refer to the previous Select Operator [SEL_177] + <-Reducer 16 [CONTAINS] + Reduce Output Operator [RS_153] PartitionCols:_col1 - Select Operator [SEL_150] (rows=14384397 width=4) + Select Operator [SEL_151] (rows=14384397 width=4) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_149] (rows=14384397 width=4) - Conds:RS_196._col0=RS_188._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 16 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_188] + Merge Join Operator [MERGEJOIN_150] (rows=14384397 width=4) + Conds:RS_197._col0=RS_189._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_189] PartitionCols:_col0 - Select Operator [SEL_187] (rows=50 width=4) + Select Operator [SEL_188] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_186] (rows=50 width=12) + Filter Operator [FIL_187] (rows=50 width=12) predicate:((d_year = 2001) and (d_moy = 12) and d_date_sk is not null) - TableScan [TS_24] (rows=73049 width=12) + TableScan [TS_27] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_196] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_197] PartitionCols:_col0 - Select Operator [SEL_195] (rows=525325345 width=118) + Select Operator [SEL_196] (rows=525325345 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_194] (rows=525325345 width=118) - predicate:(ss_sold_date_sk is not null and ss_sold_time_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_21] (rows=575995635 width=118) + Filter Operator [FIL_195] (rows=525325345 width=118) + predicate:(ss_sold_date_sk is not null and ss_sold_time_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_24] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_sold_time_sk","ss_item_sk","ss_ext_sales_price"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_193] - Group By Operator [GBY_192] (rows=1 width=12) + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_194] + Group By Operator [GBY_193] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_191] - Group By Operator [GBY_190] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_192] + Group By Operator [GBY_191] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_189] (rows=50 width=4) + Select Operator [SEL_190] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_187] - <-Reducer 2 [CONTAINS] - Reduce Output Operator [RS_144] + Please refer to the previous Select Operator [SEL_188] + <-Reducer 6 [CONTAINS] + Reduce Output Operator [RS_145] PartitionCols:_col1 - Select Operator [SEL_142] (rows=3941098 width=118) + Select Operator [SEL_143] (rows=3941098 width=118) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_141] (rows=3941098 width=118) - Conds:RS_163._col0=RS_155._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_155] + Merge Join Operator [MERGEJOIN_142] (rows=3941098 width=118) + Conds:RS_175._col0=RS_167._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_167] PartitionCols:_col0 - Select Operator [SEL_154] (rows=50 width=4) + Select Operator [SEL_166] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_153] (rows=50 width=12) + Filter Operator [FIL_165] (rows=50 width=12) predicate:((d_year = 2001) and (d_moy = 12) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=12) + TableScan [TS_6] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_163] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_175] PartitionCols:_col0 - Select Operator [SEL_162] (rows=143930836 width=123) + Select Operator [SEL_174] (rows=143930836 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_161] (rows=143930836 width=123) - predicate:(ws_sold_time_sk is not null and ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=123) + Filter Operator [FIL_173] (rows=143930836 width=123) + predicate:(ws_sold_time_sk is not null and ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_3] (rows=144002668 width=123) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_item_sk","ws_ext_sales_price"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_160] - Group By Operator [GBY_159] (rows=1 width=12) + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_172] + Group By Operator [GBY_171] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_158] - Group By Operator [GBY_157] (rows=1 width=12) + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_170] + Group By Operator [GBY_169] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_156] (rows=50 width=4) + Select Operator [SEL_168] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_154] + Please refer to the previous Select Operator [SEL_166] diff --git a/ql/src/test/results/clientpositive/perf/tez/query72.q.out b/ql/src/test/results/clientpositive/perf/tez/query72.q.out index 8383a401c2..a47244102c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query72.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query72.q.out @@ -81,16 +81,16 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 9 <- Reducer 17 (BROADCAST_EDGE) -Reducer 10 <- Map 16 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 19 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 20 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Map 21 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Map 22 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Map 13 <- Reducer 19 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) +Reducer 15 <- Map 20 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Map 21 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Map 22 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 23 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 24 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) @@ -101,186 +101,186 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_297] - Limit [LIM_296] (rows=100 width=312) + File Output Operator [FS_301] + Limit [LIM_300] (rows=100 width=312) Number of rows:100 - Select Operator [SEL_295] (rows=384313734 width=312) + Select Operator [SEL_299] (rows=384313734 width=312) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_294] - Top N Key Operator [TNK_293] (rows=384313734 width=312) + SHUFFLE [RS_298] + Top N Key Operator [TNK_297] (rows=384313734 width=312) keys:_col5, _col0, _col1, _col2,top n:100 - Group By Operator [GBY_292] (rows=384313734 width=312) + Group By Operator [GBY_296] (rows=384313734 width=312) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_70] + SHUFFLE [RS_72] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_69] (rows=576990095 width=312) + Group By Operator [GBY_71] (rows=576990095 width=312) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col3)","count(_col4)","count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_67] (rows=1488051228 width=292) + Select Operator [SEL_69] (rows=1488051228 width=292) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_253] (rows=1488051228 width=292) - Conds:RS_64._col4, _col6=RS_291._col0, _col1(Left Outer),Output:["_col13","_col15","_col19","_col25"] + Merge Join Operator [MERGEJOIN_257] (rows=1488051228 width=292) + Conds:RS_66._col4, _col6=RS_295._col0, _col1(Left Outer),Output:["_col13","_col15","_col19","_col25"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_291] + SHUFFLE [RS_295] PartitionCols:_col0, _col1 - Select Operator [SEL_290] (rows=28798881 width=8) + Select Operator [SEL_294] (rows=28798881 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_289] (rows=28798881 width=8) + Filter Operator [FIL_293] (rows=28798881 width=8) predicate:(cr_item_sk is not null and cr_order_number is not null) - TableScan [TS_61] (rows=28798881 width=8) + TableScan [TS_63] (rows=28798881 width=8) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_64] + SHUFFLE [RS_66] PartitionCols:_col4, _col6 - Select Operator [SEL_60] (rows=576990095 width=300) + Select Operator [SEL_62] (rows=576990095 width=300) Output:["_col4","_col6","_col13","_col15","_col19","_col25"] - Merge Join Operator [MERGEJOIN_252] (rows=576990095 width=300) - Conds:RS_57._col0, _col19=RS_288._col0, _col1(Inner),Output:["_col5","_col9","_col14","_col16","_col19","_col23"] + Merge Join Operator [MERGEJOIN_256] (rows=576990095 width=300) + Conds:RS_59._col0, _col19=RS_292._col0, _col1(Inner),Output:["_col5","_col9","_col14","_col16","_col19","_col23"] <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_288] + SHUFFLE [RS_292] PartitionCols:_col0, _col1 - Select Operator [SEL_287] (rows=73049 width=8) + Select Operator [SEL_291] (rows=73049 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_286] (rows=73049 width=8) + Filter Operator [FIL_290] (rows=73049 width=8) predicate:(d_date_sk is not null and d_week_seq is not null) - TableScan [TS_47] (rows=73049 width=8) + TableScan [TS_49] (rows=73049 width=8) default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_57] + SHUFFLE [RS_59] PartitionCols:_col0, _col19 - Filter Operator [FIL_56] (rows=516036043 width=311) + Filter Operator [FIL_58] (rows=516036043 width=311) predicate:(_col3 < _col17) - Merge Join Operator [MERGEJOIN_251] (rows=1548108131 width=311) - Conds:RS_53._col1=RS_54._col8(Inner),Output:["_col0","_col3","_col5","_col9","_col14","_col16","_col17","_col19","_col23"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_54] + Merge Join Operator [MERGEJOIN_255] (rows=1548108131 width=311) + Conds:RS_55._col1=RS_56._col8(Inner),Output:["_col0","_col3","_col5","_col9","_col14","_col16","_col17","_col19","_col23"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_56] PartitionCols:_col8 - Select Operator [SEL_46] (rows=2712713 width=219) + Select Operator [SEL_48] (rows=2712713 width=219) Output:["_col3","_col8","_col10","_col11","_col13","_col17"] - Filter Operator [FIL_45] (rows=2712713 width=219) - predicate:(_col17 > _col10) - Merge Join Operator [MERGEJOIN_250] (rows=8138139 width=219) - Conds:RS_42._col1=RS_285._col0(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col13","_col15","_col17"] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_285] + Filter Operator [FIL_47] (rows=2712713 width=219) + predicate:(_col1 > _col14) + Merge Join Operator [MERGEJOIN_254] (rows=8138139 width=219) + Conds:RS_266._col0=RS_45._col3(Inner),Output:["_col1","_col3","_col8","_col10","_col11","_col13","_col14","_col17"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_266] PartitionCols:_col0 - Select Operator [SEL_284] (rows=73049 width=12) + Select Operator [SEL_265] (rows=73049 width=12) Output:["_col0","_col1"] - Filter Operator [FIL_283] (rows=73049 width=98) + Filter Operator [FIL_264] (rows=73049 width=98) predicate:(d_date_sk is not null and UDFToDouble(d_date) is not null) - TableScan [TS_24] (rows=73049 width=98) + TableScan [TS_6] (rows=73049 width=98) default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_249] (rows=8138139 width=214) - Conds:RS_39._col4=RS_282._col0(Inner),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col13","_col15"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_282] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_253] (rows=8138139 width=214) + Conds:RS_269._col0=RS_41._col4(Inner),Output:["_col1","_col3","_col6","_col8","_col9","_col11","_col12","_col15"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_269] PartitionCols:_col0 - Select Operator [SEL_281] (rows=462000 width=188) + Select Operator [SEL_268] (rows=462000 width=188) Output:["_col0","_col1"] - Filter Operator [FIL_280] (rows=462000 width=188) + Filter Operator [FIL_267] (rows=462000 width=188) predicate:i_item_sk is not null - TableScan [TS_21] (rows=462000 width=188) + TableScan [TS_9] (rows=462000 width=188) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_desc"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_39] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_41] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_248] (rows=8138139 width=30) - Conds:RS_36._col5=RS_279._col0(Left Outer),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col13"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_279] + Merge Join Operator [MERGEJOIN_252] (rows=8138139 width=30) + Conds:RS_36._col5=RS_289._col0(Left Outer),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col13"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_289] PartitionCols:_col0 - Select Operator [SEL_278] (rows=2300 width=4) + Select Operator [SEL_288] (rows=2300 width=4) Output:["_col0"] - Filter Operator [FIL_277] (rows=2300 width=4) + Filter Operator [FIL_287] (rows=2300 width=4) predicate:p_promo_sk is not null - TableScan [TS_18] (rows=2300 width=4) + TableScan [TS_24] (rows=2300 width=4) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk"] - <-Reducer 12 [SIMPLE_EDGE] + <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_36] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_247] (rows=8138139 width=29) - Conds:RS_33._col3=RS_276._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_276] + Merge Join Operator [MERGEJOIN_251] (rows=8138139 width=29) + Conds:RS_33._col3=RS_286._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col9","_col10"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_286] PartitionCols:_col0 - Select Operator [SEL_275] (rows=1440 width=4) + Select Operator [SEL_285] (rows=1440 width=4) Output:["_col0"] - Filter Operator [FIL_274] (rows=1440 width=96) + Filter Operator [FIL_284] (rows=1440 width=96) predicate:((hd_buy_potential = '1001-5000') and hd_demo_sk is not null) - TableScan [TS_15] (rows=7200 width=96) + TableScan [TS_21] (rows=7200 width=96) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_buy_potential"] - <-Reducer 11 [SIMPLE_EDGE] + <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_33] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_246] (rows=40690691 width=35) - Conds:RS_30._col2=RS_273._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_273] + Merge Join Operator [MERGEJOIN_250] (rows=40690691 width=35) + Conds:RS_30._col2=RS_283._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_283] PartitionCols:_col0 - Select Operator [SEL_272] (rows=265971 width=4) + Select Operator [SEL_282] (rows=265971 width=4) Output:["_col0"] - Filter Operator [FIL_271] (rows=265971 width=89) + Filter Operator [FIL_281] (rows=265971 width=89) predicate:((cd_marital_status = 'M') and cd_demo_sk is not null) - TableScan [TS_12] (rows=1861800 width=89) + TableScan [TS_18] (rows=1861800 width=89) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] - <-Reducer 10 [SIMPLE_EDGE] + <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_245] (rows=99576237 width=39) - Conds:RS_270._col0=RS_262._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 16 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_262] + Merge Join Operator [MERGEJOIN_249] (rows=99576237 width=39) + Conds:RS_280._col0=RS_272._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] + <-Map 18 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_272] PartitionCols:_col0 - Select Operator [SEL_261] (rows=652 width=16) + Select Operator [SEL_271] (rows=652 width=16) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_260] (rows=652 width=106) + Filter Operator [FIL_270] (rows=652 width=106) predicate:((d_year = 2001) and d_date_sk is not null and d_week_seq is not null and UDFToDouble(d_date) is not null) - TableScan [TS_9] (rows=73049 width=106) + TableScan [TS_15] (rows=73049 width=106) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_week_seq","d_year"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_270] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_280] PartitionCols:_col0 - Select Operator [SEL_269] (rows=280863798 width=31) + Select Operator [SEL_279] (rows=280863798 width=31) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_268] (rows=280863798 width=31) + Filter Operator [FIL_278] (rows=280863798 width=31) predicate:(cs_sold_date_sk is not null and cs_bill_cdemo_sk is not null and cs_ship_date_sk is not null and cs_quantity is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_28_d1_d_date_sk_min) AND DynamicValue(RS_28_d1_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_28_d1_d_date_sk_bloom_filter))) - TableScan [TS_6] (rows=287989836 width=31) + TableScan [TS_12] (rows=287989836 width=31) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_bill_cdemo_sk","cs_bill_hdemo_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_quantity"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_267] - Group By Operator [GBY_266] (rows=1 width=12) + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_277] + Group By Operator [GBY_276] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_265] - Group By Operator [GBY_264] (rows=1 width=12) + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_275] + Group By Operator [GBY_274] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_263] (rows=652 width=4) + Select Operator [SEL_273] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_261] + Please refer to the previous Select Operator [SEL_271] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_53] + SHUFFLE [RS_55] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_244] (rows=35703276 width=111) - Conds:RS_256._col2=RS_259._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + Merge Join Operator [MERGEJOIN_248] (rows=35703276 width=111) + Conds:RS_260._col2=RS_263._col0(Inner),Output:["_col0","_col1","_col3","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_256] + SHUFFLE [RS_260] PartitionCols:_col2 - Select Operator [SEL_255] (rows=35703276 width=15) + Select Operator [SEL_259] (rows=35703276 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_254] (rows=35703276 width=15) + Filter Operator [FIL_258] (rows=35703276 width=15) predicate:(inv_quantity_on_hand is not null and inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null) TableScan [TS_0] (rows=37584000 width=15) default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_259] + SHUFFLE [RS_263] PartitionCols:_col0 - Select Operator [SEL_258] (rows=27 width=104) + Select Operator [SEL_262] (rows=27 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_257] (rows=27 width=104) + Filter Operator [FIL_261] (rows=27 width=104) predicate:w_warehouse_sk is not null TableScan [TS_3] (rows=27 width=104) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query74.q.out b/ql/src/test/results/clientpositive/perf/tez/query74.q.out index 4e92161c15..30f72bb92b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query74.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query74.q.out @@ -131,271 +131,271 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 23 (BROADCAST_EDGE) -Map 13 <- Reducer 25 (BROADCAST_EDGE) -Map 17 <- Reducer 22 (BROADCAST_EDGE) -Map 9 <- Reducer 24 (BROADCAST_EDGE) -Reducer 10 <- Map 21 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 26 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 15 <- Map 26 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (SIMPLE_EDGE) -Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 19 <- Map 26 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE) -Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 24 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 26 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 20 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Map 14 <- Reducer 17 (BROADCAST_EDGE) +Map 24 <- Reducer 19 (BROADCAST_EDGE) +Map 25 <- Reducer 21 (BROADCAST_EDGE) +Map 26 <- Reducer 23 (BROADCAST_EDGE) +Reducer 10 <- Map 1 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 1 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 16 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 19 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 20 <- Map 16 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) +Reducer 21 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 16 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE) +Reducer 23 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 1 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 13 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_354] - Limit [LIM_353] (rows=100 width=280) + Reducer 5 vectorized + File Output Operator [FS_357] + Limit [LIM_356] (rows=100 width=280) Number of rows:100 - Select Operator [SEL_352] (rows=12248093 width=280) + Select Operator [SEL_355] (rows=12248093 width=280) Output:["_col0","_col1","_col2"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_93] - Select Operator [SEL_92] (rows=12248093 width=280) + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_98] + Select Operator [SEL_97] (rows=12248093 width=280) Output:["_col0","_col1","_col2"] - Top N Key Operator [TNK_158] (rows=12248093 width=732) - keys:_col8, _col7, _col9,top n:100 - Filter Operator [FIL_91] (rows=12248093 width=732) - predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col6) THEN (((_col1 / _col5) > (_col10 / _col3))) ELSE (false) END) ELSE (false) END - Merge Join Operator [MERGEJOIN_288] (rows=24496186 width=732) - Conds:RS_88._col2=RS_351._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col7","_col8","_col9","_col10"] - <-Reducer 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_351] + Top N Key Operator [TNK_163] (rows=12248093 width=732) + keys:_col1, _col0, _col2,top n:100 + Filter Operator [FIL_96] (rows=12248093 width=732) + predicate:CASE WHEN (_col7 is not null) THEN (CASE WHEN (_col10) THEN (((_col5 / _col9) > (_col3 / _col7))) ELSE (false) END) ELSE (false) END + Merge Join Operator [MERGEJOIN_291] (rows=24496186 width=732) + Conds:RS_326._col0=RS_94._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col5","_col7","_col9","_col10"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_326] PartitionCols:_col0 - Group By Operator [GBY_350] (rows=80000000 width=392) + Group By Operator [GBY_325] (rows=80000000 width=392) Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_79] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_78] (rows=80000000 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_285] (rows=187573258 width=377) - Conds:RS_74._col1=RS_318._col0(Inner),Output:["_col2","_col5","_col6","_col7"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_318] + Group By Operator [GBY_17] (rows=80000000 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col6)"],keys:_col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_282] (rows=187573258 width=377) + Conds:RS_294._col0=RS_14._col1(Inner),Output:["_col1","_col2","_col3","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_294] PartitionCols:_col0 - Select Operator [SEL_317] (rows=80000000 width=284) + Select Operator [SEL_293] (rows=80000000 width=284) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_316] (rows=80000000 width=284) + Filter Operator [FIL_292] (rows=80000000 width=284) predicate:(c_customer_sk is not null and c_customer_id is not null) - TableScan [TS_68] (rows=80000000 width=284) + TableScan [TS_0] (rows=80000000 width=284) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_74] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_284] (rows=187573258 width=101) - Conds:RS_349._col0=RS_295._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_295] + Merge Join Operator [MERGEJOIN_281] (rows=187573258 width=101) + Conds:RS_324._col0=RS_304._col0(Inner),Output:["_col1","_col2"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_304] PartitionCols:_col0 - Select Operator [SEL_292] (rows=652 width=4) + Select Operator [SEL_301] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_289] (rows=652 width=8) + Filter Operator [FIL_298] (rows=652 width=8) predicate:((d_year = 2002) and (d_year) IN (2001, 2002) and d_date_sk is not null) - TableScan [TS_65] (rows=73049 width=8) + TableScan [TS_6] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_349] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_324] PartitionCols:_col0 - Select Operator [SEL_348] (rows=525327388 width=114) + Select Operator [SEL_323] (rows=525327388 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_347] (rows=525327388 width=114) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_72_date_dim_d_date_sk_min) AND DynamicValue(RS_72_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_72_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_62] (rows=575995635 width=114) + Filter Operator [FIL_322] (rows=525327388 width=114) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_3] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_346] - Group By Operator [GBY_345] (rows=1 width=12) + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_321] + Group By Operator [GBY_320] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_307] - Group By Operator [GBY_303] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_316] + Group By Operator [GBY_312] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_296] (rows=652 width=4) + Select Operator [SEL_305] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_292] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_88] + Please refer to the previous Select Operator [SEL_301] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_94] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_287] (rows=20485011 width=440) - Conds:RS_85._col2=RS_344._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] - <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_344] + Merge Join Operator [MERGEJOIN_290] (rows=20485011 width=440) + Conds:RS_89._col2=RS_354._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6"] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_354] PartitionCols:_col0 - Select Operator [SEL_343] (rows=17130654 width=216) + Select Operator [SEL_353] (rows=17130654 width=216) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_342] (rows=17130654 width=212) + Filter Operator [FIL_352] (rows=17130654 width=212) predicate:(_col3 > 0) - Select Operator [SEL_341] (rows=51391963 width=212) + Select Operator [SEL_351] (rows=51391963 width=212) Output:["_col0","_col3"] - Group By Operator [GBY_340] (rows=51391963 width=392) + Group By Operator [GBY_350] (rows=51391963 width=392) Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_58] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_82] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_57] (rows=51391963 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_283] (rows=51391963 width=391) - Conds:RS_53._col1=RS_321._col0(Inner),Output:["_col2","_col5","_col6","_col7"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + Group By Operator [GBY_81] (rows=51391963 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col6)"],keys:_col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_288] (rows=51391963 width=391) + Conds:RS_297._col0=RS_78._col1(Inner),Output:["_col1","_col2","_col3","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_297] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_317] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_53] + Please refer to the previous Select Operator [SEL_293] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_78] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_282] (rows=51391963 width=115) - Conds:RS_339._col0=RS_301._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_301] + Merge Join Operator [MERGEJOIN_287] (rows=51391963 width=115) + Conds:RS_349._col0=RS_310._col0(Inner),Output:["_col1","_col2"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_310] PartitionCols:_col0 - Select Operator [SEL_294] (rows=652 width=4) + Select Operator [SEL_303] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_291] (rows=652 width=8) + Filter Operator [FIL_300] (rows=652 width=8) predicate:((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) - Please refer to the previous TableScan [TS_65] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_339] + Please refer to the previous TableScan [TS_6] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_349] PartitionCols:_col0 - Select Operator [SEL_338] (rows=143930993 width=119) + Select Operator [SEL_348] (rows=143930993 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_337] (rows=143930993 width=119) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_51_date_dim_d_date_sk_min) AND DynamicValue(RS_51_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_51_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_41] (rows=144002668 width=119) + Filter Operator [FIL_347] (rows=143930993 width=119) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_74_date_dim_d_date_sk_min) AND DynamicValue(RS_74_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_74_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_67] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_336] - Group By Operator [GBY_335] (rows=1 width=12) + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_346] + Group By Operator [GBY_345] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_310] - Group By Operator [GBY_306] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_319] + Group By Operator [GBY_315] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_302] (rows=652 width=4) + Select Operator [SEL_311] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_294] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_85] + Please refer to the previous Select Operator [SEL_303] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_89] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_286] (rows=31888273 width=324) - Conds:RS_324._col0=RS_334._col0(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_334] + Merge Join Operator [MERGEJOIN_289] (rows=31888273 width=324) + Conds:RS_334._col0=RS_344._col0(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_344] PartitionCols:_col0 - Select Operator [SEL_333] (rows=26666666 width=212) + Select Operator [SEL_343] (rows=26666666 width=212) Output:["_col0","_col1"] - Filter Operator [FIL_332] (rows=26666666 width=212) + Filter Operator [FIL_342] (rows=26666666 width=212) predicate:(_col3 > 0) - Select Operator [SEL_331] (rows=80000000 width=212) + Select Operator [SEL_341] (rows=80000000 width=212) Output:["_col0","_col3"] - Group By Operator [GBY_330] (rows=80000000 width=392) + Group By Operator [GBY_340] (rows=80000000 width=392) Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_37] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_60] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_36] (rows=80000000 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_281] (rows=187573258 width=377) - Conds:RS_32._col1=RS_320._col0(Inner),Output:["_col2","_col5","_col6","_col7"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + Group By Operator [GBY_59] (rows=80000000 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col6)"],keys:_col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_286] (rows=187573258 width=377) + Conds:RS_296._col0=RS_56._col1(Inner),Output:["_col1","_col2","_col3","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_296] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_317] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_32] + Please refer to the previous Select Operator [SEL_293] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_56] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_280] (rows=187573258 width=101) - Conds:RS_329._col0=RS_299._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_299] + Merge Join Operator [MERGEJOIN_285] (rows=187573258 width=101) + Conds:RS_339._col0=RS_308._col0(Inner),Output:["_col1","_col2"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_308] PartitionCols:_col0 - Select Operator [SEL_293] (rows=652 width=4) + Select Operator [SEL_302] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_290] (rows=652 width=8) + Filter Operator [FIL_299] (rows=652 width=8) predicate:((d_year = 2001) and (d_year) IN (2001, 2002) and d_date_sk is not null) - Please refer to the previous TableScan [TS_65] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_329] + Please refer to the previous TableScan [TS_6] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_339] PartitionCols:_col0 - Select Operator [SEL_328] (rows=525327388 width=114) + Select Operator [SEL_338] (rows=525327388 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_327] (rows=525327388 width=114) - predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_20] (rows=575995635 width=114) + Filter Operator [FIL_337] (rows=525327388 width=114) + predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_52_date_dim_d_date_sk_min) AND DynamicValue(RS_52_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_52_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_45] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_326] - Group By Operator [GBY_325] (rows=1 width=12) + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_336] + Group By Operator [GBY_335] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_309] - Group By Operator [GBY_305] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_318] + Group By Operator [GBY_314] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_300] (rows=652 width=4) + Select Operator [SEL_309] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_293] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_324] + Please refer to the previous Select Operator [SEL_302] + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_334] PartitionCols:_col0 - Select Operator [SEL_323] (rows=51391963 width=212) + Select Operator [SEL_333] (rows=51391963 width=212) Output:["_col0","_col1"] - Group By Operator [GBY_322] (rows=51391963 width=392) + Group By Operator [GBY_332] (rows=51391963 width=392) Output:["_col0","_col1","_col2","_col3"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_39] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_16] (rows=51391963 width=392) - Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col2)"],keys:_col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_279] (rows=51391963 width=391) - Conds:RS_12._col1=RS_319._col0(Inner),Output:["_col2","_col5","_col6","_col7"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] + Group By Operator [GBY_38] (rows=51391963 width=392) + Output:["_col0","_col1","_col2","_col3"],aggregations:["max(_col6)"],keys:_col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_284] (rows=51391963 width=391) + Conds:RS_295._col0=RS_35._col1(Inner),Output:["_col1","_col2","_col3","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_317] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + Please refer to the previous Select Operator [SEL_293] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_35] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_278] (rows=51391963 width=115) - Conds:RS_315._col0=RS_297._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_297] + Merge Join Operator [MERGEJOIN_283] (rows=51391963 width=115) + Conds:RS_331._col0=RS_306._col0(Inner),Output:["_col1","_col2"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_306] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_292] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_315] + Please refer to the previous Select Operator [SEL_301] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_331] PartitionCols:_col0 - Select Operator [SEL_314] (rows=143930993 width=119) + Select Operator [SEL_330] (rows=143930993 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_313] (rows=143930993 width=119) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=119) + Filter Operator [FIL_329] (rows=143930993 width=119) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_24] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_312] - Group By Operator [GBY_311] (rows=1 width=12) + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_328] + Group By Operator [GBY_327] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_308] - Group By Operator [GBY_304] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_317] + Group By Operator [GBY_313] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_298] (rows=652 width=4) + Select Operator [SEL_307] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_292] + Please refer to the previous Select Operator [SEL_301] diff --git a/ql/src/test/results/clientpositive/perf/tez/query75.q.out b/ql/src/test/results/clientpositive/perf/tez/query75.q.out index a9af09aec7..42f8647dcf 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query75.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query75.q.out @@ -157,438 +157,438 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 12 (BROADCAST_EDGE) -Map 39 <- Reducer 16 (BROADCAST_EDGE) -Map 41 <- Reducer 20 (BROADCAST_EDGE) -Map 43 <- Reducer 28 (BROADCAST_EDGE) -Map 44 <- Reducer 32 (BROADCAST_EDGE) -Map 45 <- Reducer 36 (BROADCAST_EDGE) -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 11 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE) -Reducer 14 <- Map 37 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Map 40 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 16 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 11 (SIMPLE_EDGE), Map 41 (SIMPLE_EDGE) -Reducer 18 <- Map 37 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) -Reducer 19 <- Map 42 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 20 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 21 <- Map 11 (SIMPLE_EDGE), Map 43 (SIMPLE_EDGE) -Reducer 22 <- Map 37 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 23 <- Map 38 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE), Union 24 (CONTAINS) -Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS) -Reducer 27 <- Union 26 (SIMPLE_EDGE) -Reducer 28 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 11 (SIMPLE_EDGE), Map 44 (SIMPLE_EDGE) -Reducer 3 <- Map 37 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 37 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) -Reducer 31 <- Map 40 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE), Union 24 (CONTAINS) -Reducer 32 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Map 11 (SIMPLE_EDGE), Map 45 (SIMPLE_EDGE) -Reducer 34 <- Map 37 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) -Reducer 35 <- Map 42 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE), Union 26 (CONTAINS) -Reducer 36 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Map 38 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 8 <- Union 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 27 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Map 14 <- Reducer 18 (BROADCAST_EDGE) +Map 40 <- Reducer 22 (BROADCAST_EDGE) +Map 42 <- Reducer 26 (BROADCAST_EDGE) +Map 43 <- Reducer 29 (BROADCAST_EDGE) +Map 44 <- Reducer 33 (BROADCAST_EDGE) +Map 45 <- Reducer 37 (BROADCAST_EDGE) +Reducer 11 <- Union 10 (SIMPLE_EDGE), Union 12 (CONTAINS) +Reducer 13 <- Union 12 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 16 <- Map 38 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 40 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 20 <- Map 38 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 39 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 22 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 23 <- Map 17 (SIMPLE_EDGE), Map 42 (SIMPLE_EDGE) +Reducer 24 <- Map 38 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Map 41 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 26 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 17 (SIMPLE_EDGE), Map 43 (SIMPLE_EDGE) +Reducer 28 <- Map 38 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) +Reducer 29 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 30 <- Map 17 (SIMPLE_EDGE), Map 44 (SIMPLE_EDGE) +Reducer 31 <- Map 38 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 32 <- Map 39 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE), Union 10 (CONTAINS) +Reducer 33 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 34 <- Map 17 (SIMPLE_EDGE), Map 45 (SIMPLE_EDGE) +Reducer 35 <- Map 38 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) +Reducer 36 <- Map 41 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE), Union 12 (CONTAINS) +Reducer 37 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 13 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE), Union 10 (CONTAINS) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 10 vectorized - File Output Operator [FS_619] - Select Operator [SEL_618] (rows=100 width=160) + Reducer 8 vectorized + File Output Operator [FS_637] + Select Operator [SEL_636] (rows=100 width=160) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Limit [LIM_617] (rows=100 width=152) + Limit [LIM_635] (rows=100 width=152) Number of rows:100 - Select Operator [SEL_616] (rows=3422897230256 width=151) + Select Operator [SEL_634] (rows=3422897230256 width=151) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_175] - Select Operator [SEL_174] (rows=3422897230256 width=151) + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_181] + Select Operator [SEL_180] (rows=3422897230256 width=151) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Top N Key Operator [TNK_264] (rows=3422897230256 width=255) + Top N Key Operator [TNK_270] (rows=3422897230256 width=255) keys:(_col10 - _col4),top n:100 - Filter Operator [FIL_173] (rows=3422897230256 width=255) + Filter Operator [FIL_179] (rows=3422897230256 width=255) predicate:((CAST( _col10 AS decimal(17,2)) / CAST( _col4 AS decimal(17,2))) < 0.9) - Merge Join Operator [MERGEJOIN_519] (rows=10268691690770 width=255) - Conds:RS_612._col0, _col1, _col2, _col3=RS_615._col0, _col1, _col2, _col3(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col10","_col11"] - <-Reducer 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_615] + Merge Join Operator [MERGEJOIN_537] (rows=10268691690770 width=255) + Conds:RS_630._col0, _col1, _col2, _col3=RS_633._col0, _col1, _col2, _col3(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col10","_col11"] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_633] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_614] (rows=84235776 width=135) + Group By Operator [GBY_632] (rows=84235776 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3 - Group By Operator [GBY_613] (rows=736356923 width=131) + Group By Operator [GBY_631] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Union 26 [SIMPLE_EDGE] - <-Reducer 25 [CONTAINS] vectorized + <-Union 12 [SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] vectorized Reduce Output Operator [RS_645] PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_644] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 Group By Operator [GBY_643] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Union 24 [SIMPLE_EDGE] - <-Reducer 23 [CONTAINS] - Reduce Output Operator [RS_543] + <-Union 10 [SIMPLE_EDGE] + <-Reducer 32 [CONTAINS] + Reduce Output Operator [RS_570] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_542] (rows=621178955 width=131) + Group By Operator [GBY_569] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_540] (rows=170474971 width=131) + Select Operator [SEL_567] (rows=450703984 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_539] (rows=170474971 width=234) - Conds:RS_103._col1, _col2=RS_606._col0, _col1(Left Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_606] + Merge Join Operator [MERGEJOIN_566] (rows=450703984 width=204) + Conds:RS_654._col0, _col1=RS_131._col1, _col2(Right Outer),Output:["_col2","_col3","_col7","_col8","_col11","_col12","_col13","_col14"] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_654] PartitionCols:_col0, _col1 - Select Operator [SEL_604] (rows=28798881 width=121) + Select Operator [SEL_652] (rows=57591150 width=119) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_603] (rows=28798881 width=121) - predicate:(cr_order_number is not null and cr_item_sk is not null) - TableScan [TS_9] (rows=28798881 width=121) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_quantity","cr_return_amount"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_103] + Filter Operator [FIL_651] (rows=57591150 width=119) + predicate:(sr_ticket_number is not null and sr_item_sk is not null) + TableScan [TS_23] (rows=57591150 width=119) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_quantity","sr_return_amt"] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_131] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_511] (rows=96821196 width=138) - Conds:RS_100._col1=RS_600._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_600] + Merge Join Operator [MERGEJOIN_532] (rows=187186493 width=124) + Conds:RS_126._col1=RS_623._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_623] PartitionCols:_col0 - Select Operator [SEL_596] (rows=45745 width=19) + Select Operator [SEL_618] (rows=45745 width=19) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_595] (rows=45745 width=109) + Filter Operator [FIL_617] (rows=45745 width=109) predicate:((i_category = 'Sports') and i_manufact_id is not null and i_category_id is not null and i_brand_id is not null and i_class_id is not null and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=109) + TableScan [TS_9] (rows=462000 width=109) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id","i_category","i_manufact_id"] - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_100] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_126] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_510] (rows=101592102 width=122) - Conds:RS_642._col0=RS_572._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 11 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_572] + Merge Join Operator [MERGEJOIN_531] (rows=196410188 width=109) + Conds:RS_668._col0=RS_596._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_596] PartitionCols:_col0 - Select Operator [SEL_563] (rows=652 width=4) + Select Operator [SEL_586] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_559] (rows=652 width=8) + Filter Operator [FIL_582] (rows=652 width=8) predicate:((d_year = 2002) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=8) + TableScan [TS_6] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 43 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_642] + <-Map 44 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_668] PartitionCols:_col0 - Select Operator [SEL_641] (rows=286549727 width=127) + Select Operator [SEL_667] (rows=550076554 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_640] (rows=286549727 width=127) - predicate:(cs_sold_date_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_98_date_dim_d_date_sk_min) AND DynamicValue(RS_98_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_98_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_85] (rows=287989836 width=127) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_ext_sales_price"] - <-Reducer 28 [BROADCAST_EDGE] vectorized - BROADCAST [RS_639] - Group By Operator [GBY_638] (rows=1 width=12) + Filter Operator [FIL_666] (rows=550076554 width=122) + predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_124_date_dim_d_date_sk_min) AND DynamicValue(RS_124_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_124_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_114] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_ext_sales_price"] + <-Reducer 33 [BROADCAST_EDGE] vectorized + BROADCAST [RS_665] + Group By Operator [GBY_664] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_587] - Group By Operator [GBY_581] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_610] + Group By Operator [GBY_604] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_573] (rows=652 width=4) + Select Operator [SEL_597] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_563] - <-Reducer 31 [CONTAINS] - Reduce Output Operator [RS_552] + Please refer to the previous Select Operator [SEL_586] + <-Reducer 9 [CONTAINS] + Reduce Output Operator [RS_551] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_551] (rows=621178955 width=131) + Group By Operator [GBY_550] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_549] (rows=450703984 width=131) + Select Operator [SEL_548] (rows=170474971 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_548] (rows=450703984 width=204) - Conds:RS_125._col1, _col2=RS_628._col0, _col1(Left Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"] - <-Map 40 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_628] + Merge Join Operator [MERGEJOIN_547] (rows=170474971 width=234) + Conds:RS_579._col0, _col1=RS_108._col1, _col2(Right Outer),Output:["_col2","_col3","_col7","_col8","_col11","_col12","_col13","_col14"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_579] PartitionCols:_col0, _col1 - Select Operator [SEL_626] (rows=57591150 width=119) + Select Operator [SEL_577] (rows=28798881 width=121) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_625] (rows=57591150 width=119) - predicate:(sr_ticket_number is not null and sr_item_sk is not null) - TableScan [TS_31] (rows=57591150 width=119) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_quantity","sr_return_amt"] - <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_125] + Filter Operator [FIL_576] (rows=28798881 width=121) + predicate:(cr_order_number is not null and cr_item_sk is not null) + TableScan [TS_0] (rows=28798881 width=121) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_quantity","cr_return_amount"] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_108] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_514] (rows=187186493 width=124) - Conds:RS_122._col1=RS_601._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_601] + Merge Join Operator [MERGEJOIN_529] (rows=96821196 width=138) + Conds:RS_103._col1=RS_622._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_622] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_596] - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_122] + Please refer to the previous Select Operator [SEL_618] + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_103] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_513] (rows=196410188 width=109) - Conds:RS_650._col0=RS_574._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 11 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_574] + Merge Join Operator [MERGEJOIN_528] (rows=101592102 width=122) + Conds:RS_642._col0=RS_594._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_594] PartitionCols:_col0 - Select Operator [SEL_564] (rows=652 width=4) + Select Operator [SEL_585] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_560] (rows=652 width=8) + Filter Operator [FIL_581] (rows=652 width=8) predicate:((d_year = 2002) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 44 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_650] + Please refer to the previous TableScan [TS_6] + <-Map 43 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_642] PartitionCols:_col0 - Select Operator [SEL_649] (rows=550076554 width=122) + Select Operator [SEL_641] (rows=286549727 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_648] (rows=550076554 width=122) - predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_120_date_dim_d_date_sk_min) AND DynamicValue(RS_120_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_120_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_107] (rows=575995635 width=122) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_ext_sales_price"] - <-Reducer 32 [BROADCAST_EDGE] vectorized - BROADCAST [RS_647] - Group By Operator [GBY_646] (rows=1 width=12) + Filter Operator [FIL_640] (rows=286549727 width=127) + predicate:(cs_sold_date_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_101_date_dim_d_date_sk_min) AND DynamicValue(RS_101_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_101_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_91] (rows=287989836 width=127) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_ext_sales_price"] + <-Reducer 29 [BROADCAST_EDGE] vectorized + BROADCAST [RS_639] + Group By Operator [GBY_638] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_588] - Group By Operator [GBY_582] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_609] + Group By Operator [GBY_603] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_575] (rows=652 width=4) + Select Operator [SEL_595] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_564] - <-Reducer 35 [CONTAINS] - Reduce Output Operator [RS_557] + Please refer to the previous Select Operator [SEL_585] + <-Reducer 36 [CONTAINS] + Reduce Output Operator [RS_575] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_556] (rows=736356923 width=131) + Group By Operator [GBY_574] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_554] (rows=115177968 width=131) + Select Operator [SEL_572] (rows=115177968 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_553] (rows=115177968 width=220) - Conds:RS_154._col1, _col2=RS_637._col0, _col1(Left Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"] - <-Map 42 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_637] + Merge Join Operator [MERGEJOIN_571] (rows=115177968 width=220) + Conds:RS_663._col0, _col1=RS_161._col1, _col2(Right Outer),Output:["_col2","_col3","_col7","_col8","_col11","_col12","_col13","_col14"] + <-Map 41 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_663] PartitionCols:_col0, _col1 - Select Operator [SEL_635] (rows=14398467 width=118) + Select Operator [SEL_661] (rows=14398467 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_634] (rows=14398467 width=118) + Filter Operator [FIL_660] (rows=14398467 width=118) predicate:(wr_order_number is not null and wr_item_sk is not null) - TableScan [TS_60] (rows=14398467 width=118) + TableScan [TS_53] (rows=14398467 width=118) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_quantity","wr_return_amt"] - <-Reducer 34 [SIMPLE_EDGE] - SHUFFLE [RS_154] + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_161] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_517] (rows=48990732 width=139) - Conds:RS_151._col1=RS_602._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_602] + Merge Join Operator [MERGEJOIN_535] (rows=48990732 width=139) + Conds:RS_156._col1=RS_624._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_624] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_596] - <-Reducer 33 [SIMPLE_EDGE] - SHUFFLE [RS_151] + Please refer to the previous Select Operator [SEL_618] + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_156] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_516] (rows=51404771 width=123) - Conds:RS_655._col0=RS_576._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 11 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_576] + Merge Join Operator [MERGEJOIN_534] (rows=51404771 width=123) + Conds:RS_673._col0=RS_598._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_598] PartitionCols:_col0 - Select Operator [SEL_565] (rows=652 width=4) + Select Operator [SEL_587] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_561] (rows=652 width=8) + Filter Operator [FIL_583] (rows=652 width=8) predicate:((d_year = 2002) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] + Please refer to the previous TableScan [TS_6] <-Map 45 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_655] + SHUFFLE [RS_673] PartitionCols:_col0 - Select Operator [SEL_654] (rows=143966864 width=127) + Select Operator [SEL_672] (rows=143966864 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_653] (rows=143966864 width=127) - predicate:(ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_149_date_dim_d_date_sk_min) AND DynamicValue(RS_149_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_149_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_136] (rows=144002668 width=127) + Filter Operator [FIL_671] (rows=143966864 width=127) + predicate:(ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_154_date_dim_d_date_sk_min) AND DynamicValue(RS_154_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_154_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_144] (rows=144002668 width=127) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_ext_sales_price"] - <-Reducer 36 [BROADCAST_EDGE] vectorized - BROADCAST [RS_652] - Group By Operator [GBY_651] (rows=1 width=12) + <-Reducer 37 [BROADCAST_EDGE] vectorized + BROADCAST [RS_670] + Group By Operator [GBY_669] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_589] - Group By Operator [GBY_583] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_611] + Group By Operator [GBY_605] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_577] (rows=652 width=4) + Select Operator [SEL_599] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_565] - <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_612] + Please refer to the previous Select Operator [SEL_587] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_630] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_611] (rows=84235776 width=135) + Group By Operator [GBY_629] (rows=84235776 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3 - Group By Operator [GBY_610] (rows=736356923 width=131) + Group By Operator [GBY_628] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Union 7 [SIMPLE_EDGE] - <-Reducer 19 [CONTAINS] - Reduce Output Operator [RS_538] + <-Union 5 [SIMPLE_EDGE] + <-Reducer 25 [CONTAINS] + Reduce Output Operator [RS_565] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_537] (rows=736356923 width=131) + Group By Operator [GBY_564] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_535] (rows=115177968 width=131) + Select Operator [SEL_562] (rows=115177968 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_534] (rows=115177968 width=220) - Conds:RS_69._col1, _col2=RS_636._col0, _col1(Left Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"] - <-Map 42 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_636] + Merge Join Operator [MERGEJOIN_561] (rows=115177968 width=220) + Conds:RS_662._col0, _col1=RS_73._col1, _col2(Right Outer),Output:["_col2","_col3","_col7","_col8","_col11","_col12","_col13","_col14"] + <-Map 41 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_662] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_635] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_69] + Please refer to the previous Select Operator [SEL_661] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_73] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_508] (rows=48990732 width=139) - Conds:RS_66._col1=RS_599._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_599] + Merge Join Operator [MERGEJOIN_526] (rows=48990732 width=139) + Conds:RS_68._col1=RS_621._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_621] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_596] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_66] + Please refer to the previous Select Operator [SEL_618] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_68] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_507] (rows=51404771 width=123) - Conds:RS_633._col0=RS_570._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 11 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_570] + Merge Join Operator [MERGEJOIN_525] (rows=51404771 width=123) + Conds:RS_659._col0=RS_592._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_592] PartitionCols:_col0 - Select Operator [SEL_562] (rows=652 width=4) + Select Operator [SEL_584] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_558] (rows=652 width=8) + Filter Operator [FIL_580] (rows=652 width=8) predicate:((d_year = 2001) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 41 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_633] + Please refer to the previous TableScan [TS_6] + <-Map 42 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_659] PartitionCols:_col0 - Select Operator [SEL_632] (rows=143966864 width=127) + Select Operator [SEL_658] (rows=143966864 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_631] (rows=143966864 width=127) - predicate:(ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_64_date_dim_d_date_sk_min) AND DynamicValue(RS_64_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_64_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_51] (rows=144002668 width=127) + Filter Operator [FIL_657] (rows=143966864 width=127) + predicate:(ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_66_date_dim_d_date_sk_min) AND DynamicValue(RS_66_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_66_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_56] (rows=144002668 width=127) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_ext_sales_price"] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_630] - Group By Operator [GBY_629] (rows=1 width=12) + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_656] + Group By Operator [GBY_655] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_586] - Group By Operator [GBY_580] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_608] + Group By Operator [GBY_602] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_571] (rows=652 width=4) + Select Operator [SEL_593] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_562] - <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_609] + Please refer to the previous Select Operator [SEL_584] + <-Reducer 4 [CONTAINS] vectorized + Reduce Output Operator [RS_627] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_608] (rows=736356923 width=131) + Group By Operator [GBY_626] (rows=736356923 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_607] (rows=621178955 width=131) + Group By Operator [GBY_625] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 15 [CONTAINS] - Reduce Output Operator [RS_533] + <-Union 3 [SIMPLE_EDGE] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_542] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_532] (rows=621178955 width=131) + Group By Operator [GBY_541] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_530] (rows=450703984 width=131) + Select Operator [SEL_539] (rows=170474971 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_529] (rows=450703984 width=204) - Conds:RS_40._col1, _col2=RS_627._col0, _col1(Left Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"] - <-Map 40 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_627] + Merge Join Operator [MERGEJOIN_538] (rows=170474971 width=234) + Conds:RS_578._col0, _col1=RS_20._col1, _col2(Right Outer),Output:["_col2","_col3","_col7","_col8","_col11","_col12","_col13","_col14"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_578] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_626] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_40] + Please refer to the previous Select Operator [SEL_577] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_20] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_505] (rows=187186493 width=124) - Conds:RS_37._col1=RS_598._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_598] + Merge Join Operator [MERGEJOIN_520] (rows=96821196 width=138) + Conds:RS_15._col1=RS_619._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_619] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_596] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_37] + Please refer to the previous Select Operator [SEL_618] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_504] (rows=196410188 width=109) - Conds:RS_624._col0=RS_568._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 11 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_568] + Merge Join Operator [MERGEJOIN_519] (rows=101592102 width=122) + Conds:RS_616._col0=RS_588._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_588] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_562] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_624] + Please refer to the previous Select Operator [SEL_584] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_616] PartitionCols:_col0 - Select Operator [SEL_623] (rows=550076554 width=122) + Select Operator [SEL_615] (rows=286549727 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_622] (rows=550076554 width=122) - predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_22] (rows=575995635 width=122) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_ext_sales_price"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_621] - Group By Operator [GBY_620] (rows=1 width=12) + Filter Operator [FIL_614] (rows=286549727 width=127) + predicate:(cs_sold_date_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_3] (rows=287989836 width=127) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_ext_sales_price"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_613] + Group By Operator [GBY_612] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_585] - Group By Operator [GBY_579] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_606] + Group By Operator [GBY_600] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_569] (rows=652 width=4) + Select Operator [SEL_589] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_562] - <-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_524] + Please refer to the previous Select Operator [SEL_584] + <-Reducer 21 [CONTAINS] + Reduce Output Operator [RS_560] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_523] (rows=621178955 width=131) + Group By Operator [GBY_559] (rows=621178955 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_521] (rows=170474971 width=131) + Select Operator [SEL_557] (rows=450703984 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_520] (rows=170474971 width=234) - Conds:RS_18._col1, _col2=RS_605._col0, _col1(Left Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_605] + Merge Join Operator [MERGEJOIN_556] (rows=450703984 width=204) + Conds:RS_653._col0, _col1=RS_43._col1, _col2(Right Outer),Output:["_col2","_col3","_col7","_col8","_col11","_col12","_col13","_col14"] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_653] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_604] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] + Please refer to the previous Select Operator [SEL_652] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_43] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_502] (rows=96821196 width=138) - Conds:RS_15._col1=RS_597._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] - <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_597] + Merge Join Operator [MERGEJOIN_523] (rows=187186493 width=124) + Conds:RS_38._col1=RS_620._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_620] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_596] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_15] + Please refer to the previous Select Operator [SEL_618] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_38] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_501] (rows=101592102 width=122) - Conds:RS_594._col0=RS_566._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 11 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_566] + Merge Join Operator [MERGEJOIN_522] (rows=196410188 width=109) + Conds:RS_650._col0=RS_590._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_590] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_562] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_594] + Please refer to the previous Select Operator [SEL_584] + <-Map 40 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_650] PartitionCols:_col0 - Select Operator [SEL_593] (rows=286549727 width=127) + Select Operator [SEL_649] (rows=550076554 width=122) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_592] (rows=286549727 width=127) - predicate:(cs_sold_date_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=287989836 width=127) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_ext_sales_price"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_591] - Group By Operator [GBY_590] (rows=1 width=12) + Filter Operator [FIL_648] (rows=550076554 width=122) + predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_36_date_dim_d_date_sk_min) AND DynamicValue(RS_36_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_26] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_ext_sales_price"] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_647] + Group By Operator [GBY_646] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_584] - Group By Operator [GBY_578] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_607] + Group By Operator [GBY_601] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_567] (rows=652 width=4) + Select Operator [SEL_591] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_562] + Please refer to the previous Select Operator [SEL_584] diff --git a/ql/src/test/results/clientpositive/perf/tez/query76.q.out b/ql/src/test/results/clientpositive/perf/tez/query76.q.out index deecf746d3..ad5856dfef 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query76.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query76.q.out @@ -59,147 +59,147 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 16 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 5 <- Union 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 1 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) -Reducer 8 <- Map 14 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 9 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 10 <- Map 15 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 11 <- Map 16 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 4 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 12 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 8 <- Map 13 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 9 <- Map 14 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE), Union 3 (CONTAINS) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_188] - Limit [LIM_187] (rows=100 width=408) + Reducer 5 vectorized + File Output Operator [FS_189] + Limit [LIM_188] (rows=100 width=408) Number of rows:100 - Select Operator [SEL_186] (rows=5600 width=408) + Select Operator [SEL_187] (rows=5600 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_185] - Group By Operator [GBY_184] (rows=5600 width=408) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_186] + Group By Operator [GBY_185] (rows=5600 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Union 4 [SIMPLE_EDGE] - <-Reducer 10 [CONTAINS] - Reduce Output Operator [RS_172] + <-Union 3 [SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] + Reduce Output Operator [RS_173] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_171] (rows=224000 width=408) + Group By Operator [GBY_172] (rows=224000 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count()","sum(_col5)"],keys:_col0, _col1, _col2, _col3, _col4 - Top N Key Operator [TNK_170] (rows=26219002 width=388) + Top N Key Operator [TNK_171] (rows=26219002 width=388) keys:_col0, _col1, _col2, _col3, _col4,top n:100 - Select Operator [SEL_168] (rows=1433911 width=399) + Select Operator [SEL_169] (rows=1433911 width=399) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_167] (rows=1433911 width=209) - Conds:RS_45._col0=RS_200._col0(Inner),Output:["_col2","_col4","_col6","_col7"] + Merge Join Operator [MERGEJOIN_168] (rows=1433911 width=209) + Conds:RS_46._col0=RS_201._col0(Inner),Output:["_col2","_col4","_col6","_col7"] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_200] + SHUFFLE [RS_201] PartitionCols:_col0 - Select Operator [SEL_199] (rows=73049 width=12) + Select Operator [SEL_200] (rows=73049 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_198] (rows=73049 width=12) + Filter Operator [FIL_199] (rows=73049 width=12) predicate:d_date_sk is not null - TableScan [TS_39] (rows=73049 width=12) + TableScan [TS_40] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_45] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_46] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_153] (rows=1433911 width=205) - Conds:RS_197._col1=RS_177._col0(Inner),Output:["_col0","_col2","_col4"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_177] + Merge Join Operator [MERGEJOIN_154] (rows=1433911 width=205) + Conds:RS_198._col1=RS_181._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_181] PartitionCols:_col0 - Select Operator [SEL_174] (rows=462000 width=94) + Select Operator [SEL_178] (rows=462000 width=94) Output:["_col0","_col1"] - Filter Operator [FIL_173] (rows=462000 width=94) + Filter Operator [FIL_177] (rows=462000 width=94) predicate:i_item_sk is not null - TableScan [TS_0] (rows=462000 width=94) + TableScan [TS_3] (rows=462000 width=94) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_197] + SHUFFLE [RS_198] PartitionCols:_col1 - Select Operator [SEL_196] (rows=1433911 width=119) + Select Operator [SEL_197] (rows=1433911 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_195] (rows=1433911 width=123) + Filter Operator [FIL_196] (rows=1433911 width=123) predicate:(cs_warehouse_sk is null and cs_sold_date_sk is not null and cs_item_sk is not null) - TableScan [TS_33] (rows=287989836 width=123) + TableScan [TS_34] (rows=287989836 width=123) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_warehouse_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 3 [CONTAINS] - Reduce Output Operator [RS_160] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_161] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_159] (rows=224000 width=408) + Group By Operator [GBY_160] (rows=224000 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count()","sum(_col5)"],keys:_col0, _col1, _col2, _col3, _col4 - Top N Key Operator [TNK_158] (rows=26219002 width=388) + Top N Key Operator [TNK_159] (rows=26219002 width=388) keys:_col0, _col1, _col2, _col3, _col4,top n:100 - Select Operator [SEL_156] (rows=24749363 width=387) + Select Operator [SEL_157] (rows=24749363 width=387) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_155] (rows=24749363 width=204) - Conds:RS_12._col2=RS_183._col0(Inner),Output:["_col1","_col4","_col6","_col7"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_183] + Merge Join Operator [MERGEJOIN_156] (rows=24749363 width=204) + Conds:RS_176._col0=RS_14._col2(Inner),Output:["_col1","_col2","_col4","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_176] PartitionCols:_col0 - Select Operator [SEL_182] (rows=73049 width=12) + Select Operator [SEL_175] (rows=73049 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_181] (rows=73049 width=12) + Filter Operator [FIL_174] (rows=73049 width=12) predicate:d_date_sk is not null - TableScan [TS_6] (rows=73049 width=12) + TableScan [TS_0] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_149] (rows=24749363 width=200) - Conds:RS_175._col0=RS_180._col1(Inner),Output:["_col1","_col2","_col4"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_175] + Merge Join Operator [MERGEJOIN_150] (rows=24749363 width=200) + Conds:RS_179._col0=RS_184._col1(Inner),Output:["_col1","_col2","_col4"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_179] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_174] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_180] + Please refer to the previous Select Operator [SEL_178] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_184] PartitionCols:_col1 - Select Operator [SEL_179] (rows=24749363 width=114) + Select Operator [SEL_183] (rows=24749363 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_178] (rows=24749363 width=118) + Filter Operator [FIL_182] (rows=24749363 width=118) predicate:(ss_addr_sk is null and ss_sold_date_sk is not null and ss_item_sk is not null) - TableScan [TS_3] (rows=575995635 width=118) + TableScan [TS_6] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 8 [CONTAINS] - Reduce Output Operator [RS_166] + <-Reducer 9 [CONTAINS] + Reduce Output Operator [RS_167] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_165] (rows=224000 width=408) + Group By Operator [GBY_166] (rows=224000 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count()","sum(_col5)"],keys:_col0, _col1, _col2, _col3, _col4 - Top N Key Operator [TNK_164] (rows=26219002 width=388) + Top N Key Operator [TNK_165] (rows=26219002 width=388) keys:_col0, _col1, _col2, _col3, _col4,top n:100 - Select Operator [SEL_162] (rows=35728 width=394) + Select Operator [SEL_163] (rows=35728 width=394) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_161] (rows=35728 width=209) - Conds:RS_28._col0=RS_194._col0(Inner),Output:["_col2","_col4","_col6","_col7"] + Merge Join Operator [MERGEJOIN_162] (rows=35728 width=209) + Conds:RS_29._col0=RS_195._col0(Inner),Output:["_col2","_col4","_col6","_col7"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_194] + SHUFFLE [RS_195] PartitionCols:_col0 - Select Operator [SEL_193] (rows=73049 width=12) + Select Operator [SEL_194] (rows=73049 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_192] (rows=73049 width=12) + Filter Operator [FIL_193] (rows=73049 width=12) predicate:d_date_sk is not null - TableScan [TS_22] (rows=73049 width=12) + TableScan [TS_23] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_28] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_29] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_151] (rows=35728 width=205) - Conds:RS_191._col1=RS_176._col0(Inner),Output:["_col0","_col2","_col4"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_176] + Merge Join Operator [MERGEJOIN_152] (rows=35728 width=205) + Conds:RS_192._col1=RS_180._col0(Inner),Output:["_col0","_col2","_col4"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_180] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_174] + Please refer to the previous Select Operator [SEL_178] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_191] + SHUFFLE [RS_192] PartitionCols:_col1 - Select Operator [SEL_190] (rows=35728 width=119) + Select Operator [SEL_191] (rows=35728 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_189] (rows=35728 width=123) + Filter Operator [FIL_190] (rows=35728 width=123) predicate:(ws_web_page_sk is null and ws_sold_date_sk is not null and ws_item_sk is not null) - TableScan [TS_16] (rows=144002668 width=123) + TableScan [TS_17] (rows=144002668 width=123) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_ext_sales_price"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query77.q.out b/ql/src/test/results/clientpositive/perf/tez/query77.q.out index 484003835a..d9efd61d9b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query77.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query77.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[322][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[325][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product PREHOOK: query: explain with ss as (select s_store_sk, @@ -236,7 +236,7 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Reducer 10 (BROADCAST_EDGE) Map 30 <- Reducer 17 (BROADCAST_EDGE) -Map 32 <- Reducer 24 (BROADCAST_EDGE) +Map 33 <- Reducer 24 (BROADCAST_EDGE) Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) Reducer 11 <- Map 29 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Reducer 12 <- Map 28 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) @@ -248,13 +248,13 @@ Reducer 17 <- Map 9 (CUSTOM_SIMPLE_EDGE) Reducer 18 <- Map 31 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 20 <- Map 32 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 21 <- Map 33 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 20 <- Map 33 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 21 <- Map 32 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) Reducer 22 <- Reducer 21 (SIMPLE_EDGE) Reducer 23 <- Reducer 22 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE), Union 6 (CONTAINS) Reducer 24 <- Map 9 (CUSTOM_SIMPLE_EDGE) Reducer 25 <- Map 34 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 26 <- Map 33 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 26 <- Map 32 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) Reducer 27 <- Reducer 26 (SIMPLE_EDGE) Reducer 3 <- Map 28 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) @@ -267,280 +267,280 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_372] - Limit [LIM_371] (rows=52 width=439) + File Output Operator [FS_375] + Limit [LIM_374] (rows=52 width=439) Number of rows:100 - Select Operator [SEL_370] (rows=52 width=439) + Select Operator [SEL_373] (rows=52 width=439) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_369] - Select Operator [SEL_368] (rows=52 width=439) + SHUFFLE [RS_372] + Select Operator [SEL_371] (rows=52 width=439) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_367] (rows=52 width=447) + Group By Operator [GBY_370] (rows=52 width=447) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 6 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] - Reduce Output Operator [RS_327] + Reduce Output Operator [RS_330] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_326] (rows=52 width=447) + Group By Operator [GBY_329] (rows=52 width=447) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_325] (rows=35 width=435) + Top N Key Operator [TNK_328] (rows=35 width=435) keys:_col0, _col1,top n:100 - Select Operator [SEL_323] (rows=2 width=439) + Select Operator [SEL_326] (rows=2 width=439) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_322] (rows=2 width=452) + Merge Join Operator [MERGEJOIN_325] (rows=2 width=452) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_379] - Group By Operator [GBY_378] (rows=2 width=228) + PARTITION_ONLY_SHUFFLE [RS_382] + Group By Operator [GBY_381] (rows=2 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_55] + SHUFFLE [RS_56] PartitionCols:_col0 - Group By Operator [GBY_54] (rows=56 width=227) + Group By Operator [GBY_55] (rows=56 width=227) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col1 - Merge Join Operator [MERGEJOIN_307] (rows=31836679 width=222) - Conds:RS_377._col0=RS_339._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_310] (rows=31836679 width=222) + Conds:RS_380._col0=RS_342._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_339] + SHUFFLE [RS_342] PartitionCols:_col0 - Select Operator [SEL_335] (rows=8116 width=4) + Select Operator [SEL_338] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_334] (rows=8116 width=98) + Filter Operator [FIL_337] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_377] + SHUFFLE [RS_380] PartitionCols:_col0 - Select Operator [SEL_376] (rows=286549727 width=231) + Select Operator [SEL_379] (rows=286549727 width=231) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_375] (rows=286549727 width=231) - predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_51_date_dim_d_date_sk_min) AND DynamicValue(RS_51_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_51_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_44] (rows=287989836 width=231) + Filter Operator [FIL_378] (rows=286549727 width=231) + predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_52_date_dim_d_date_sk_min) AND DynamicValue(RS_52_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_52_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_45] (rows=287989836 width=231) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_call_center_sk","cs_ext_sales_price","cs_net_profit"] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_374] - Group By Operator [GBY_373] (rows=1 width=12) + BROADCAST [RS_377] + Group By Operator [GBY_376] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_349] - Group By Operator [GBY_346] (rows=1 width=12) + SHUFFLE [RS_352] + Group By Operator [GBY_349] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_340] (rows=8116 width=4) + Select Operator [SEL_343] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_335] + Please refer to the previous Select Operator [SEL_338] <-Reducer 19 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_384] - Group By Operator [GBY_383] (rows=1 width=224) + PARTITION_ONLY_SHUFFLE [RS_387] + Group By Operator [GBY_386] (rows=1 width=224) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Reducer 18 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_69] - Group By Operator [GBY_68] (rows=1 width=224) + PARTITION_ONLY_SHUFFLE [RS_70] + Group By Operator [GBY_69] (rows=1 width=224) Output:["_col0","_col1"],aggregations:["sum(_col1)","sum(_col2)"] - Merge Join Operator [MERGEJOIN_308] (rows=3199657 width=183) - Conds:RS_382._col0=RS_341._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_311] (rows=3199657 width=183) + Conds:RS_385._col0=RS_344._col0(Inner),Output:["_col1","_col2"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_341] + SHUFFLE [RS_344] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_335] + Please refer to the previous Select Operator [SEL_338] <-Map 31 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_382] + SHUFFLE [RS_385] PartitionCols:_col0 - Select Operator [SEL_381] (rows=28798881 width=223) + Select Operator [SEL_384] (rows=28798881 width=223) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_380] (rows=28798881 width=223) + Filter Operator [FIL_383] (rows=28798881 width=223) predicate:cr_returned_date_sk is not null - TableScan [TS_58] (rows=28798881 width=223) + TableScan [TS_59] (rows=28798881 width=223) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_return_amount","cr_net_loss"] <-Reducer 23 [CONTAINS] - Reduce Output Operator [RS_333] + Reduce Output Operator [RS_336] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_332] (rows=52 width=447) + Group By Operator [GBY_335] (rows=52 width=447) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_331] (rows=35 width=435) + Top N Key Operator [TNK_334] (rows=35 width=435) keys:_col0, _col1,top n:100 - Select Operator [SEL_329] (rows=23 width=435) + Select Operator [SEL_332] (rows=23 width=435) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_328] (rows=23 width=452) - Conds:RS_395._col0=RS_400._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_331] (rows=23 width=452) + Conds:RS_398._col0=RS_403._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_395] + SHUFFLE [RS_398] PartitionCols:_col0 - Group By Operator [GBY_394] (rows=23 width=228) + Group By Operator [GBY_397] (rows=23 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_94] + SHUFFLE [RS_96] PartitionCols:_col0 - Group By Operator [GBY_93] (rows=345 width=228) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col5 - Merge Join Operator [MERGEJOIN_310] (rows=15991254 width=227) - Conds:RS_89._col1=RS_392._col0(Inner),Output:["_col2","_col3","_col5"] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_392] + Group By Operator [GBY_95] (rows=345 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0 + Merge Join Operator [MERGEJOIN_313] (rows=15991254 width=227) + Conds:RS_395._col0=RS_92._col1(Inner),Output:["_col0","_col3","_col4"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_395] PartitionCols:_col0 - Select Operator [SEL_391] (rows=4602 width=4) + Select Operator [SEL_394] (rows=4602 width=4) Output:["_col0"] - Filter Operator [FIL_390] (rows=4602 width=4) + Filter Operator [FIL_393] (rows=4602 width=4) predicate:wp_web_page_sk is not null - TableScan [TS_83] (rows=4602 width=4) + TableScan [TS_78] (rows=4602 width=4) default@web_page,web_page,Tbl:COMPLETE,Col:COMPLETE,Output:["wp_web_page_sk"] <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_89] + SHUFFLE [RS_92] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_309] (rows=15991254 width=227) - Conds:RS_389._col0=RS_342._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_312] (rows=15991254 width=227) + Conds:RS_392._col0=RS_345._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_342] + SHUFFLE [RS_345] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_335] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_389] + Please refer to the previous Select Operator [SEL_338] + <-Map 33 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_392] PartitionCols:_col0 - Select Operator [SEL_388] (rows=143931136 width=231) + Select Operator [SEL_391] (rows=143931136 width=231) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_387] (rows=143931136 width=231) - predicate:(ws_sold_date_sk is not null and ws_web_page_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_87_date_dim_d_date_sk_min) AND DynamicValue(RS_87_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_87_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_77] (rows=144002668 width=231) + Filter Operator [FIL_390] (rows=143931136 width=231) + predicate:(ws_sold_date_sk is not null and ws_web_page_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_88_date_dim_d_date_sk_min) AND DynamicValue(RS_88_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_88_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_81] (rows=144002668 width=231) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_web_page_sk","ws_ext_sales_price","ws_net_profit"] <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_386] - Group By Operator [GBY_385] (rows=1 width=12) + BROADCAST [RS_389] + Group By Operator [GBY_388] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_350] - Group By Operator [GBY_347] (rows=1 width=12) + SHUFFLE [RS_353] + Group By Operator [GBY_350] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_343] (rows=8116 width=4) + Select Operator [SEL_346] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_335] + Please refer to the previous Select Operator [SEL_338] <-Reducer 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_400] + SHUFFLE [RS_403] PartitionCols:_col0 - Group By Operator [GBY_399] (rows=23 width=228) + Group By Operator [GBY_402] (rows=23 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_114] + SHUFFLE [RS_117] PartitionCols:_col0 - Group By Operator [GBY_113] (rows=23 width=228) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col5 - Merge Join Operator [MERGEJOIN_312] (rows=1458758 width=137) - Conds:RS_109._col1=RS_393._col0(Inner),Output:["_col2","_col3","_col5"] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_393] + Group By Operator [GBY_116] (rows=23 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0 + Merge Join Operator [MERGEJOIN_315] (rows=1458758 width=137) + Conds:RS_396._col0=RS_113._col1(Inner),Output:["_col0","_col3","_col4"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_396] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_391] + Please refer to the previous Select Operator [SEL_394] <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_109] + SHUFFLE [RS_113] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_311] (rows=1458758 width=135) - Conds:RS_398._col0=RS_344._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_314] (rows=1458758 width=135) + Conds:RS_401._col0=RS_347._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_344] + SHUFFLE [RS_347] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_335] + Please refer to the previous Select Operator [SEL_338] <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_398] + SHUFFLE [RS_401] PartitionCols:_col0 - Select Operator [SEL_397] (rows=13129719 width=221) + Select Operator [SEL_400] (rows=13129719 width=221) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_396] (rows=13129719 width=221) + Filter Operator [FIL_399] (rows=13129719 width=221) predicate:(wr_web_page_sk is not null and wr_returned_date_sk is not null) - TableScan [TS_97] (rows=14398467 width=221) + TableScan [TS_102] (rows=14398467 width=221) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_web_page_sk","wr_return_amt","wr_net_loss"] <-Reducer 5 [CONTAINS] - Reduce Output Operator [RS_321] + Reduce Output Operator [RS_324] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_320] (rows=52 width=447) + Group By Operator [GBY_323] (rows=52 width=447) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_319] (rows=35 width=435) + Top N Key Operator [TNK_322] (rows=35 width=435) keys:_col0, _col1,top n:100 - Select Operator [SEL_317] (rows=10 width=437) + Select Operator [SEL_320] (rows=10 width=437) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_316] (rows=10 width=452) - Conds:RS_361._col0=RS_366._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_319] (rows=10 width=452) + Conds:RS_364._col0=RS_369._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] <-Reducer 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_366] + SHUFFLE [RS_369] PartitionCols:_col0 - Group By Operator [GBY_365] (rows=10 width=228) + Group By Operator [GBY_368] (rows=10 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_37] + SHUFFLE [RS_38] PartitionCols:_col0 - Group By Operator [GBY_36] (rows=40 width=228) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col5 - Merge Join Operator [MERGEJOIN_306] (rows=5959021 width=157) - Conds:RS_32._col1=RS_359._col0(Inner),Output:["_col2","_col3","_col5"] + Group By Operator [GBY_37] (rows=40 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0 + Merge Join Operator [MERGEJOIN_309] (rows=5959021 width=157) + Conds:RS_362._col0=RS_34._col1(Inner),Output:["_col0","_col3","_col4"] <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_359] + SHUFFLE [RS_362] PartitionCols:_col0 - Select Operator [SEL_357] (rows=1704 width=4) + Select Operator [SEL_360] (rows=1704 width=4) Output:["_col0"] - Filter Operator [FIL_356] (rows=1704 width=4) + Filter Operator [FIL_359] (rows=1704 width=4) predicate:s_store_sk is not null TableScan [TS_6] (rows=1704 width=4) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk"] <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_32] + SHUFFLE [RS_34] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_305] (rows=5959021 width=156) - Conds:RS_364._col0=RS_338._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_308] (rows=5959021 width=156) + Conds:RS_367._col0=RS_341._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_338] + SHUFFLE [RS_341] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_335] + Please refer to the previous Select Operator [SEL_338] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_364] + SHUFFLE [RS_367] PartitionCols:_col0 - Select Operator [SEL_363] (rows=53634860 width=223) + Select Operator [SEL_366] (rows=53634860 width=223) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_362] (rows=53634860 width=223) + Filter Operator [FIL_365] (rows=53634860 width=223) predicate:(sr_store_sk is not null and sr_returned_date_sk is not null) - TableScan [TS_20] (rows=57591150 width=223) + TableScan [TS_23] (rows=57591150 width=223) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_store_sk","sr_return_amt","sr_net_loss"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_361] + SHUFFLE [RS_364] PartitionCols:_col0 - Group By Operator [GBY_360] (rows=10 width=228) + Group By Operator [GBY_363] (rows=10 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 Group By Operator [GBY_16] (rows=320 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col5 - Merge Join Operator [MERGEJOIN_304] (rows=58365993 width=137) - Conds:RS_12._col1=RS_358._col0(Inner),Output:["_col2","_col3","_col5"] + Merge Join Operator [MERGEJOIN_307] (rows=58365993 width=137) + Conds:RS_12._col1=RS_361._col0(Inner),Output:["_col2","_col3","_col5"] <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_358] + SHUFFLE [RS_361] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_357] + Please refer to the previous Select Operator [SEL_360] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_303] (rows=58365993 width=135) - Conds:RS_355._col0=RS_336._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_306] (rows=58365993 width=135) + Conds:RS_358._col0=RS_339._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_336] + SHUFFLE [RS_339] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_335] + Please refer to the previous Select Operator [SEL_338] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_355] + SHUFFLE [RS_358] PartitionCols:_col0 - Select Operator [SEL_354] (rows=525329897 width=221) + Select Operator [SEL_357] (rows=525329897 width=221) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_353] (rows=525329897 width=221) + Filter Operator [FIL_356] (rows=525329897 width=221) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=221) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_352] - Group By Operator [GBY_351] (rows=1 width=12) + BROADCAST [RS_355] + Group By Operator [GBY_354] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_348] - Group By Operator [GBY_345] (rows=1 width=12) + SHUFFLE [RS_351] + Group By Operator [GBY_348] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_337] (rows=8116 width=4) + Select Operator [SEL_340] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_335] + Please refer to the previous Select Operator [SEL_338] diff --git a/ql/src/test/results/clientpositive/perf/tez/query80.q.out b/ql/src/test/results/clientpositive/perf/tez/query80.q.out index 7899482ff0..31dca0c6de 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query80.q.out @@ -217,333 +217,333 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 13 (BROADCAST_EDGE) -Map 29 <- Reducer 19 (BROADCAST_EDGE) -Map 33 <- Reducer 25 (BROADCAST_EDGE) -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 12 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) -Reducer 15 <- Map 26 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Map 27 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 17 <- Map 32 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 8 (CONTAINS) -Reducer 19 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 20 <- Map 12 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) -Reducer 21 <- Map 26 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Map 27 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 23 <- Map 36 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Reducer 23 (SIMPLE_EDGE), Union 8 (CONTAINS) -Reducer 25 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 29 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) +Map 18 <- Reducer 23 (BROADCAST_EDGE) +Map 30 <- Reducer 25 (BROADCAST_EDGE) +Map 33 <- Reducer 27 (BROADCAST_EDGE) +Reducer 10 <- Map 7 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 11 <- Map 28 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 29 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 14 <- Map 7 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) +Reducer 15 <- Map 28 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Map 36 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 20 <- Map 22 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 22 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 25 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 22 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) +Reducer 27 <- Map 22 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 31 <- Map 30 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) Reducer 34 <- Map 33 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) -Reducer 4 <- Map 26 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 27 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 28 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE), Union 8 (CONTAINS) -Reducer 9 <- Union 8 (SIMPLE_EDGE) +Reducer 5 <- Union 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 9 <- Map 28 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 10 vectorized - File Output Operator [FS_443] - Limit [LIM_442] (rows=100 width=619) + Reducer 6 vectorized + File Output Operator [FS_446] + Limit [LIM_445] (rows=100 width=619) Number of rows:100 - Select Operator [SEL_441] (rows=59581 width=619) + Select Operator [SEL_444] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_440] - Select Operator [SEL_439] (rows=59581 width=619) + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_443] + Select Operator [SEL_442] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_438] (rows=59581 width=627) + Group By Operator [GBY_441] (rows=59581 width=627) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 8 [SIMPLE_EDGE] - <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_459] + <-Union 4 [SIMPLE_EDGE] + <-Reducer 13 [CONTAINS] vectorized + Reduce Output Operator [RS_462] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_458] (rows=59581 width=627) + Group By Operator [GBY_461] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_457] (rows=39721 width=618) + Top N Key Operator [TNK_460] (rows=39721 width=618) keys:_col0, _col1,top n:100 - Select Operator [SEL_456] (rows=38846 width=619) + Select Operator [SEL_459] (rows=38846 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_455] (rows=38846 width=436) + Group By Operator [GBY_458] (rows=38846 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_75] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_79] PartitionCols:_col0 - Group By Operator [GBY_74] (rows=427306 width=436) + Group By Operator [GBY_78] (rows=427306 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 - Select Operator [SEL_72] (rows=8592843 width=305) + Select Operator [SEL_76] (rows=8592843 width=305) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_374] (rows=8592843 width=305) - Conds:RS_69._col1=RS_454._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_454] + Merge Join Operator [MERGEJOIN_377] (rows=8592843 width=305) + Conds:RS_457._col0=RS_74._col2(Inner),Output:["_col1","_col8","_col9","_col12","_col13"] + <-Map 29 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_457] PartitionCols:_col0 - Select Operator [SEL_453] (rows=46000 width=104) + Select Operator [SEL_456] (rows=46000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_452] (rows=46000 width=104) + Filter Operator [FIL_455] (rows=46000 width=104) predicate:cp_catalog_page_sk is not null - TableScan [TS_54] (rows=46000 width=104) + TableScan [TS_41] (rows=46000 width=104) default@catalog_page,catalog_page,Tbl:COMPLETE,Col:COMPLETE,Output:["cp_catalog_page_sk","cp_catalog_page_id"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_69] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_373] (rows=8592843 width=208) - Conds:RS_66._col3=RS_428._col0(Inner),Output:["_col1","_col5","_col6","_col9","_col10"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_428] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_74] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_376] (rows=8592843 width=208) + Conds:RS_69._col4=RS_434._col0(Inner),Output:["_col2","_col6","_col7","_col10","_col11"] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_434] PartitionCols:_col0 - Select Operator [SEL_426] (rows=1150 width=4) + Select Operator [SEL_432] (rows=1150 width=4) Output:["_col0"] - Filter Operator [FIL_425] (rows=1150 width=89) + Filter Operator [FIL_431] (rows=1150 width=89) predicate:((p_channel_tv = 'N') and p_promo_sk is not null) - TableScan [TS_12] (rows=2300 width=89) + TableScan [TS_22] (rows=2300 width=89) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_tv"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_66] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_372] (rows=17185686 width=222) - Conds:RS_63._col2=RS_423._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col9","_col10"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_423] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_375] (rows=17185686 width=222) + Conds:RS_407._col0=RS_67._col2(Inner),Output:["_col2","_col4","_col6","_col7","_col10","_col11"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_407] PartitionCols:_col0 - Select Operator [SEL_421] (rows=154000 width=4) + Select Operator [SEL_405] (rows=154000 width=4) Output:["_col0"] - Filter Operator [FIL_420] (rows=154000 width=115) + Filter Operator [FIL_404] (rows=154000 width=115) predicate:((i_current_price > 50) and i_item_sk is not null) - TableScan [TS_9] (rows=462000 width=115) + TableScan [TS_3] (rows=462000 width=115) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_63] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_67] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_371] (rows=51557056 width=232) - Conds:RS_60._col0=RS_402._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_402] + Merge Join Operator [MERGEJOIN_374] (rows=51557056 width=232) + Conds:RS_59._col0=RS_413._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_413] PartitionCols:_col0 - Select Operator [SEL_399] (rows=8116 width=4) + Select Operator [SEL_410] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_398] (rows=8116 width=98) + Filter Operator [FIL_409] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=98) + TableScan [TS_12] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_60] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_59] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_370] (rows=464045263 width=326) - Conds:RS_448._col2, _col4=RS_451._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_448] + Merge Join Operator [MERGEJOIN_373] (rows=464045263 width=326) + Conds:RS_451._col2, _col4=RS_454._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 30 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_451] PartitionCols:_col2, _col4 - Select Operator [SEL_447] (rows=283691906 width=243) + Select Operator [SEL_450] (rows=283691906 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_446] (rows=283691906 width=243) - predicate:(cs_promo_sk is not null and cs_sold_date_sk is not null and cs_catalog_page_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_61_date_dim_d_date_sk_min) AND DynamicValue(RS_61_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_61_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_39] (rows=287989836 width=243) + Filter Operator [FIL_449] (rows=283691906 width=243) + predicate:(cs_promo_sk is not null and cs_sold_date_sk is not null and cs_catalog_page_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_60_date_dim_d_date_sk_min) AND DynamicValue(RS_60_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_60_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_47] (rows=287989836 width=243) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_catalog_page_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_ext_sales_price","cs_net_profit"] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_445] - Group By Operator [GBY_444] (rows=1 width=12) + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_448] + Group By Operator [GBY_447] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_410] - Group By Operator [GBY_407] (rows=1 width=12) + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_421] + Group By Operator [GBY_418] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_403] (rows=8116 width=4) + Select Operator [SEL_414] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_399] - <-Map 31 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_451] + Please refer to the previous Select Operator [SEL_410] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_454] PartitionCols:_col0, _col1 - Select Operator [SEL_450] (rows=28798881 width=227) + Select Operator [SEL_453] (rows=28798881 width=227) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_449] (rows=28798881 width=227) + Filter Operator [FIL_452] (rows=28798881 width=227) predicate:(cr_item_sk is not null and cr_order_number is not null) - TableScan [TS_42] (rows=28798881 width=227) + TableScan [TS_50] (rows=28798881 width=227) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_amount","cr_net_loss"] - <-Reducer 24 [CONTAINS] vectorized - Reduce Output Operator [RS_475] + <-Reducer 17 [CONTAINS] vectorized + Reduce Output Operator [RS_478] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_474] (rows=59581 width=627) + Group By Operator [GBY_477] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_473] (rows=39721 width=618) + Top N Key Operator [TNK_476] (rows=39721 width=618) keys:_col0, _col1,top n:100 - Select Operator [SEL_472] (rows=53 width=615) + Select Operator [SEL_475] (rows=53 width=615) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_471] (rows=53 width=436) + Group By Operator [GBY_474] (rows=53 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_115] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_121] PartitionCols:_col0 - Group By Operator [GBY_114] (rows=318 width=436) + Group By Operator [GBY_120] (rows=318 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 - Select Operator [SEL_112] (rows=4714659 width=323) + Select Operator [SEL_118] (rows=4714659 width=323) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_379] (rows=4714659 width=323) - Conds:RS_109._col2=RS_470._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] + Merge Join Operator [MERGEJOIN_382] (rows=4714659 width=323) + Conds:RS_115._col4=RS_473._col0(Inner),Output:["_col7","_col8","_col11","_col12","_col15"] <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_470] + SHUFFLE [RS_473] PartitionCols:_col0 - Select Operator [SEL_469] (rows=84 width=104) + Select Operator [SEL_472] (rows=84 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_468] (rows=84 width=104) + Filter Operator [FIL_471] (rows=84 width=104) predicate:web_site_sk is not null - TableScan [TS_94] (rows=84 width=104) + TableScan [TS_109] (rows=84 width=104) default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_site_id"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_109] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_378] (rows=4714659 width=227) - Conds:RS_106._col3=RS_429._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_429] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_115] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_381] (rows=4714659 width=227) + Conds:RS_435._col0=RS_113._col4(Inner),Output:["_col4","_col7","_col8","_col11","_col12"] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_435] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_426] - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_106] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_377] (rows=9429318 width=231) - Conds:RS_103._col1=RS_424._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_424] + Please refer to the previous Select Operator [SEL_432] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_113] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_380] (rows=9429318 width=231) + Conds:RS_408._col0=RS_106._col1(Inner),Output:["_col3","_col4","_col6","_col7","_col10","_col11"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_408] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_421] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_103] + Please refer to the previous Select Operator [SEL_405] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_106] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_376] (rows=28287952 width=235) - Conds:RS_100._col0=RS_404._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_404] + Merge Join Operator [MERGEJOIN_379] (rows=28287952 width=235) + Conds:RS_101._col0=RS_415._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_415] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_399] + Please refer to the previous Select Operator [SEL_410] <-Reducer 34 [SIMPLE_EDGE] - SHUFFLE [RS_100] + SHUFFLE [RS_101] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_375] (rows=254608997 width=363) - Conds:RS_464._col1, _col4=RS_467._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_378] (rows=254608997 width=363) + Conds:RS_467._col1, _col4=RS_470._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_464] + SHUFFLE [RS_467] PartitionCols:_col1, _col4 - Select Operator [SEL_463] (rows=143894769 width=243) + Select Operator [SEL_466] (rows=143894769 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_462] (rows=143894769 width=243) - predicate:(ws_promo_sk is not null and ws_web_site_sk is not null and ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_101_date_dim_d_date_sk_min) AND DynamicValue(RS_101_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_101_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_79] (rows=144002668 width=243) + Filter Operator [FIL_465] (rows=143894769 width=243) + predicate:(ws_promo_sk is not null and ws_web_site_sk is not null and ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_102_date_dim_d_date_sk_min) AND DynamicValue(RS_102_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_102_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_89] (rows=144002668 width=243) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_site_sk","ws_promo_sk","ws_order_number","ws_ext_sales_price","ws_net_profit"] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_461] - Group By Operator [GBY_460] (rows=1 width=12) + <-Reducer 27 [BROADCAST_EDGE] vectorized + BROADCAST [RS_464] + Group By Operator [GBY_463] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_411] - Group By Operator [GBY_408] (rows=1 width=12) + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_422] + Group By Operator [GBY_419] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_405] (rows=8116 width=4) + Select Operator [SEL_416] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_399] + Please refer to the previous Select Operator [SEL_410] <-Map 35 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_467] + SHUFFLE [RS_470] PartitionCols:_col0, _col1 - Select Operator [SEL_466] (rows=14398467 width=221) + Select Operator [SEL_469] (rows=14398467 width=221) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_465] (rows=14398467 width=221) + Filter Operator [FIL_468] (rows=14398467 width=221) predicate:(wr_item_sk is not null and wr_order_number is not null) - TableScan [TS_82] (rows=14398467 width=221) + TableScan [TS_92] (rows=14398467 width=221) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_amt","wr_net_loss"] - <-Reducer 7 [CONTAINS] vectorized - Reduce Output Operator [RS_437] + <-Reducer 3 [CONTAINS] vectorized + Reduce Output Operator [RS_440] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_436] (rows=59581 width=627) + Group By Operator [GBY_439] (rows=59581 width=627) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_435] (rows=39721 width=618) + Top N Key Operator [TNK_438] (rows=39721 width=618) keys:_col0, _col1,top n:100 - Select Operator [SEL_434] (rows=822 width=617) + Select Operator [SEL_437] (rows=822 width=617) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_433] (rows=822 width=436) + Group By Operator [GBY_436] (rows=822 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_36] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_38] PartitionCols:_col0 - Group By Operator [GBY_35] (rows=4932 width=436) + Group By Operator [GBY_37] (rows=4932 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 - Select Operator [SEL_33] (rows=15038783 width=100) + Select Operator [SEL_35] (rows=15038783 width=100) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_369] (rows=15038783 width=100) - Conds:RS_30._col2=RS_432._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_432] + Merge Join Operator [MERGEJOIN_372] (rows=15038783 width=100) + Conds:RS_403._col0=RS_33._col3(Inner),Output:["_col1","_col8","_col9","_col12","_col13"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_403] PartitionCols:_col0 - Select Operator [SEL_431] (rows=1704 width=104) + Select Operator [SEL_402] (rows=1704 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_430] (rows=1704 width=104) + Filter Operator [FIL_401] (rows=1704 width=104) predicate:s_store_sk is not null - TableScan [TS_15] (rows=1704 width=104) + TableScan [TS_0] (rows=1704 width=104) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_368] (rows=15038783 width=0) - Conds:RS_27._col3=RS_427._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_427] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_371] (rows=15038783 width=0) + Conds:RS_28._col4=RS_433._col0(Inner),Output:["_col3","_col6","_col7","_col10","_col11"] + <-Map 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_433] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_426] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_367] (rows=30077566 width=57) - Conds:RS_24._col1=RS_422._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_422] + Please refer to the previous Select Operator [SEL_432] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_370] (rows=30077566 width=57) + Conds:RS_406._col0=RS_26._col1(Inner),Output:["_col3","_col4","_col6","_col7","_col10","_col11"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_406] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_421] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_24] + Please refer to the previous Select Operator [SEL_405] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_26] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_366] (rows=90232695 width=177) - Conds:RS_21._col0=RS_400._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_400] + Merge Join Operator [MERGEJOIN_369] (rows=90232695 width=177) + Conds:RS_18._col0=RS_411._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_411] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_399] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_21] + Please refer to the previous Select Operator [SEL_410] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_365] (rows=812149846 width=374) - Conds:RS_416._col1, _col4=RS_419._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_416] + Merge Join Operator [MERGEJOIN_368] (rows=812149846 width=374) + Conds:RS_427._col1, _col4=RS_430._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_427] PartitionCols:_col1, _col4 - Select Operator [SEL_415] (rows=501693263 width=233) + Select Operator [SEL_426] (rows=501693263 width=233) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_414] (rows=501693263 width=233) - predicate:(ss_sold_date_sk is not null and ss_promo_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=233) + Filter Operator [FIL_425] (rows=501693263 width=233) + predicate:(ss_sold_date_sk is not null and ss_promo_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_6] (rows=575995635 width=233) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_ext_sales_price","ss_net_profit"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_413] - Group By Operator [GBY_412] (rows=1 width=12) + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_424] + Group By Operator [GBY_423] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_409] - Group By Operator [GBY_406] (rows=1 width=12) + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_420] + Group By Operator [GBY_417] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_401] (rows=8116 width=4) + Select Operator [SEL_412] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_399] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_419] + Please refer to the previous Select Operator [SEL_410] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_430] PartitionCols:_col0, _col1 - Select Operator [SEL_418] (rows=57591150 width=224) + Select Operator [SEL_429] (rows=57591150 width=224) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_417] (rows=57591150 width=224) + Filter Operator [FIL_428] (rows=57591150 width=224) predicate:(sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_3] (rows=57591150 width=224) + TableScan [TS_9] (rows=57591150 width=224) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_amt","sr_net_loss"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query81.q.out b/ql/src/test/results/clientpositive/perf/tez/query81.q.out index b96913f906..741e667315 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query81.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query81.q.out @@ -71,156 +71,156 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 14 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 12 <- Map 15 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 10 <- Map 6 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 14 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 7 <- Map 14 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 8 <- Map 15 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 11 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 4 vectorized - File Output Operator [FS_216] - Select Operator [SEL_215] (rows=100 width=1503) + File Output Operator [FS_218] + Select Operator [SEL_217] (rows=100 width=1503) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - Limit [LIM_214] (rows=100 width=1417) + Limit [LIM_216] (rows=100 width=1417) Number of rows:100 - Select Operator [SEL_213] (rows=1609248 width=1416) + Select Operator [SEL_215] (rows=1609248 width=1416) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_66] - Select Operator [SEL_65] (rows=1609248 width=1416) + SHUFFLE [RS_68] + Select Operator [SEL_67] (rows=1609248 width=1416) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - Top N Key Operator [TNK_105] (rows=1609248 width=1418) + Top N Key Operator [TNK_107] (rows=1609248 width=1418) keys:_col1, _col3, _col4, _col5, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col19,top n:100 - Merge Join Operator [MERGEJOIN_182] (rows=1609248 width=1418) - Conds:RS_62._col0=RS_63._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col19"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_63] + Merge Join Operator [MERGEJOIN_184] (rows=1609248 width=1418) + Conds:RS_64._col0=RS_65._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col19"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_64] PartitionCols:_col0 - Select Operator [SEL_58] (rows=1609248 width=227) + Merge Join Operator [MERGEJOIN_178] (rows=1568628 width=1310) + Conds:RS_187._col2=RS_190._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_187] + PartitionCols:_col2 + Select Operator [SEL_186] (rows=80000000 width=375) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_185] (rows=80000000 width=375) + predicate:(c_current_addr_sk is not null and c_customer_sk is not null) + TableScan [TS_0] (rows=80000000 width=375) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_190] + PartitionCols:_col0 + Select Operator [SEL_189] (rows=784314 width=941) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_188] (rows=784314 width=1027) + predicate:((ca_state = 'IL') and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=1027) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_street_type","ca_suite_number","ca_city","ca_county","ca_state","ca_zip","ca_country","ca_gmt_offset","ca_location_type"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col0 + Select Operator [SEL_60] (rows=1609248 width=227) Output:["_col0","_col2"] - Filter Operator [FIL_57] (rows=1609248 width=227) + Filter Operator [FIL_59] (rows=1609248 width=227) predicate:(_col2 > _col3) - Merge Join Operator [MERGEJOIN_181] (rows=4827746 width=227) - Conds:RS_206._col1=RS_212._col1(Inner),Output:["_col0","_col2","_col3"] - <-Reducer 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] + Merge Join Operator [MERGEJOIN_183] (rows=4827746 width=227) + Conds:RS_208._col1=RS_214._col1(Inner),Output:["_col0","_col2","_col3"] + <-Reducer 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_214] PartitionCols:_col1 - Select Operator [SEL_211] (rows=12 width=198) + Select Operator [SEL_213] (rows=12 width=198) Output:["_col0","_col1"] - Filter Operator [FIL_210] (rows=12 width=206) + Filter Operator [FIL_212] (rows=12 width=206) predicate:CAST( (_col1 / _col2) AS decimal(21,6)) is not null - Group By Operator [GBY_209] (rows=12 width=206) + Group By Operator [GBY_211] (rows=12 width=206) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col0 - Select Operator [SEL_208] (rows=5266632 width=201) + Select Operator [SEL_210] (rows=5266632 width=201) Output:["_col0","_col2"] - Group By Operator [GBY_207] (rows=5266632 width=201) + Group By Operator [GBY_209] (rows=5266632 width=201) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_45] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_47] PartitionCols:_col0 - Group By Operator [GBY_44] (rows=8749496 width=201) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col6, _col1 - Merge Join Operator [MERGEJOIN_180] (rows=8749496 width=194) - Conds:RS_40._col2=RS_202._col0(Inner),Output:["_col1","_col3","_col6"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_202] + Group By Operator [GBY_46] (rows=8749496 width=201) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col5)"],keys:_col1, _col3 + Merge Join Operator [MERGEJOIN_182] (rows=8749496 width=194) + Conds:RS_194._col0=RS_43._col2(Inner),Output:["_col1","_col3","_col5"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_194] PartitionCols:_col0 - Select Operator [SEL_200] (rows=40000000 width=90) + Select Operator [SEL_192] (rows=40000000 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_199] (rows=40000000 width=90) + Filter Operator [FIL_191] (rows=40000000 width=90) predicate:(ca_address_sk is not null and ca_state is not null) - TableScan [TS_12] (rows=40000000 width=90) + TableScan [TS_6] (rows=40000000 width=90) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_40] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_43] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_179] (rows=8749496 width=112) - Conds:RS_194._col0=RS_198._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] + Merge Join Operator [MERGEJOIN_181] (rows=8749496 width=112) + Conds:RS_200._col0=RS_204._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_200] PartitionCols:_col0 - Select Operator [SEL_196] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_195] (rows=652 width=8) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_194] - PartitionCols:_col0 - Select Operator [SEL_192] (rows=28221532 width=121) + Select Operator [SEL_198] (rows=28221532 width=121) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_190] (rows=28221532 width=121) + Filter Operator [FIL_196] (rows=28221532 width=121) predicate:(cr_returning_addr_sk is not null and cr_returned_date_sk is not null) - TableScan [TS_6] (rows=28798881 width=121) + TableScan [TS_9] (rows=28798881 width=121) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_returning_addr_sk","cr_return_amt_inc_tax"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_206] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_204] + PartitionCols:_col0 + Select Operator [SEL_202] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_201] (rows=652 width=8) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_12] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_208] PartitionCols:_col1 - Filter Operator [FIL_205] (rows=4827746 width=201) + Filter Operator [FIL_207] (rows=4827746 width=201) predicate:_col2 is not null - Select Operator [SEL_204] (rows=4827746 width=201) + Select Operator [SEL_206] (rows=4827746 width=201) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_203] (rows=4827746 width=201) + Group By Operator [GBY_205] (rows=4827746 width=201) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_23] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_24] PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=8574602 width=201) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col6, _col1 - Merge Join Operator [MERGEJOIN_178] (rows=8574602 width=194) - Conds:RS_18._col2=RS_201._col0(Inner),Output:["_col1","_col3","_col6"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] + Group By Operator [GBY_23] (rows=8574602 width=201) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col5)"],keys:_col1, _col3 + Merge Join Operator [MERGEJOIN_180] (rows=8574602 width=194) + Conds:RS_193._col0=RS_20._col2(Inner),Output:["_col1","_col3","_col5"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_193] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_200] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_18] + Please refer to the previous Select Operator [SEL_192] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_20] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_177] (rows=8574602 width=112) - Conds:RS_193._col0=RS_197._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_197] + Merge Join Operator [MERGEJOIN_179] (rows=8574602 width=112) + Conds:RS_199._col0=RS_203._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_199] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_196] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] - PartitionCols:_col0 - Select Operator [SEL_191] (rows=27657410 width=121) + Select Operator [SEL_197] (rows=27657410 width=121) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_189] (rows=27657410 width=121) + Filter Operator [FIL_195] (rows=27657410 width=121) predicate:(cr_returning_addr_sk is not null and cr_returning_customer_sk is not null and cr_returned_date_sk is not null) - Please refer to the previous TableScan [TS_6] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_62] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_176] (rows=1568628 width=1310) - Conds:RS_185._col2=RS_188._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_185] - PartitionCols:_col2 - Select Operator [SEL_184] (rows=80000000 width=375) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_183] (rows=80000000 width=375) - predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_0] (rows=80000000 width=375) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_current_addr_sk","c_salutation","c_first_name","c_last_name"] - <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_188] - PartitionCols:_col0 - Select Operator [SEL_187] (rows=784314 width=941) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_186] (rows=784314 width=1027) - predicate:((ca_state = 'IL') and ca_address_sk is not null) - TableScan [TS_3] (rows=40000000 width=1027) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_street_type","ca_suite_number","ca_city","ca_county","ca_state","ca_zip","ca_country","ca_gmt_offset","ca_location_type"] + Please refer to the previous TableScan [TS_9] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_203] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_202] diff --git a/ql/src/test/results/clientpositive/perf/tez/query85.q.out b/ql/src/test/results/clientpositive/perf/tez/query85.q.out index b05e829033..7c0f59e86f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query85.q.out @@ -184,12 +184,12 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Reducer 8 (BROADCAST_EDGE) -Reducer 10 <- Map 14 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 15 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 16 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 11 <- Map 16 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 9 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 17 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) @@ -200,138 +200,136 @@ Stage-0 limit:-1 Stage-1 Reducer 6 vectorized - File Output Operator [FS_239] - Limit [LIM_238] (rows=72 width=656) + File Output Operator [FS_240] + Limit [LIM_239] (rows=72 width=656) Number of rows:100 - Select Operator [SEL_237] (rows=72 width=656) + Select Operator [SEL_238] (rows=72 width=656) Output:["_col0","_col1","_col2","_col3"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_236] - Select Operator [SEL_235] (rows=72 width=656) + SHUFFLE [RS_237] + Select Operator [SEL_236] (rows=72 width=656) Output:["_col4","_col5","_col6","_col7"] - Top N Key Operator [TNK_234] (rows=72 width=353) + Top N Key Operator [TNK_235] (rows=72 width=353) keys:substr(_col0, 1, 20), (UDFToDouble(_col1) / _col2), CAST( (_col3 / _col4) AS decimal(11,6)), CAST( (_col5 / _col6) AS decimal(11,6)),top n:100 - Group By Operator [GBY_233] (rows=72 width=353) + Group By Operator [GBY_234] (rows=72 width=353) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0 <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_50] + SHUFFLE [RS_51] PartitionCols:_col0 - Group By Operator [GBY_49] (rows=288 width=353) + Group By Operator [GBY_50] (rows=288 width=353) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","count(_col4)","sum(_col28)","count(_col28)","sum(_col27)","count(_col27)"],keys:_col37 - Merge Join Operator [MERGEJOIN_204] (rows=2912400 width=313) - Conds:RS_45._col2=RS_232._col0(Inner),Output:["_col4","_col27","_col28","_col37"] + Merge Join Operator [MERGEJOIN_205] (rows=2912400 width=313) + Conds:RS_46._col2=RS_233._col0(Inner),Output:["_col4","_col27","_col28","_col37"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_232] + SHUFFLE [RS_233] PartitionCols:_col0 - Select Operator [SEL_231] (rows=4602 width=4) + Select Operator [SEL_232] (rows=4602 width=4) Output:["_col0"] - Filter Operator [FIL_230] (rows=4602 width=4) + Filter Operator [FIL_231] (rows=4602 width=4) predicate:wp_web_page_sk is not null - TableScan [TS_34] (rows=4602 width=4) + TableScan [TS_35] (rows=4602 width=4) default@web_page,web_page,Tbl:COMPLETE,Col:COMPLETE,Output:["wp_web_page_sk"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_45] + SHUFFLE [RS_46] PartitionCols:_col2 - Filter Operator [FIL_43] (rows=2912400 width=377) + Filter Operator [FIL_44] (rows=2912400 width=377) predicate:(((_col30 and _col5) or (_col31 and _col6) or (_col32 and _col7)) and ((_col15 and _col16 and _col8) or (_col17 and _col18 and _col9) or (_col19 and _col20 and _col10))) - Merge Join Operator [MERGEJOIN_203] (rows=10355208 width=377) - Conds:RS_40._col1, _col3=RS_41._col9, _col14(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col15","_col16","_col17","_col18","_col19","_col20","_col27","_col28","_col30","_col31","_col32","_col37"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_41] + Merge Join Operator [MERGEJOIN_204] (rows=10355208 width=377) + Conds:RS_41._col1, _col3=RS_42._col9, _col14(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col15","_col16","_col17","_col18","_col19","_col20","_col27","_col28","_col30","_col31","_col32","_col37"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_42] PartitionCols:_col9, _col14 - Select Operator [SEL_33] (rows=1056644 width=250) - Output:["_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col14","_col15","_col16","_col18","_col19","_col20","_col25"] - Merge Join Operator [MERGEJOIN_202] (rows=1056644 width=250) - Conds:RS_30._col4=RS_229._col0(Inner),Output:["_col0","_col5","_col6","_col7","_col9","_col10","_col11","_col18","_col19","_col20","_col21","_col22","_col23","_col25"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_229] - PartitionCols:_col0 - Select Operator [SEL_228] (rows=72 width=101) - Output:["_col0","_col1"] - Filter Operator [FIL_227] (rows=72 width=101) - predicate:r_reason_sk is not null - TableScan [TS_18] (rows=72 width=101) - default@reason,reason,Tbl:COMPLETE,Col:COMPLETE,Output:["r_reason_sk","r_reason_desc"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_201] (rows=1056644 width=155) - Conds:RS_27._col1, _col13, _col14=RS_225._col0, _col1, _col2(Inner),Output:["_col0","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col18","_col19","_col20","_col21","_col22","_col23"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] - PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_223] (rows=265971 width=207) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_222] (rows=265971 width=183) - predicate:((cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and cd_demo_sk is not null) - TableScan [TS_15] (rows=1861800 width=183) - default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col1, _col13, _col14 - Merge Join Operator [MERGEJOIN_200] (rows=1056644 width=312) - Conds:RS_24._col3=RS_226._col0(Inner),Output:["_col0","_col1","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col13","_col14"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_226] - PartitionCols:_col0 - Select Operator [SEL_224] (rows=265971 width=183) - Output:["_col0","_col1","_col2"] - Please refer to the previous Filter Operator [FIL_222] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_199] (rows=1056644 width=135) - Conds:RS_218._col2=RS_221._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_221] - PartitionCols:_col0 - Select Operator [SEL_220] (rows=3529412 width=16) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_219] (rows=3529412 width=187) - predicate:((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) - TableScan [TS_9] (rows=40000000 width=187) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_218] - PartitionCols:_col2 - Select Operator [SEL_217] (rows=11975292 width=237) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_216] (rows=11975292 width=237) - predicate:(wr_refunded_addr_sk is not null and wr_returning_cdemo_sk is not null and wr_refunded_cdemo_sk is not null and wr_reason_sk is not null and wr_item_sk is not null and wr_order_number is not null) - TableScan [TS_6] (rows=14398467 width=237) - default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] + Merge Join Operator [MERGEJOIN_203] (rows=1056644 width=250) + Conds:RS_31._col13=RS_230._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col14","_col15","_col16","_col18","_col19","_col20","_col25"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_230] + PartitionCols:_col0 + Select Operator [SEL_229] (rows=72 width=101) + Output:["_col0","_col1"] + Filter Operator [FIL_228] (rows=72 width=101) + predicate:r_reason_sk is not null + TableScan [TS_25] (rows=72 width=101) + default@reason,reason,Tbl:COMPLETE,Col:COMPLETE,Output:["r_reason_sk","r_reason_desc"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col13 + Merge Join Operator [MERGEJOIN_202] (rows=1056644 width=155) + Conds:RS_220._col0, _col1, _col2=RS_29._col1, _col13, _col14(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col13","_col14","_col15","_col16","_col18","_col19","_col20"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_220] + PartitionCols:_col0, _col1, _col2 + Select Operator [SEL_218] (rows=265971 width=207) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_217] (rows=265971 width=183) + predicate:((cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and cd_demo_sk is not null) + TableScan [TS_6] (rows=1861800 width=183) + default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col1, _col13, _col14 + Merge Join Operator [MERGEJOIN_201] (rows=1056644 width=312) + Conds:RS_21._col3=RS_221._col0(Inner),Output:["_col0","_col1","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col13","_col14"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_221] + PartitionCols:_col0 + Select Operator [SEL_219] (rows=265971 width=183) + Output:["_col0","_col1","_col2"] + Please refer to the previous Filter Operator [FIL_217] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_200] (rows=1056644 width=135) + Conds:RS_224._col2=RS_227._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_224] + PartitionCols:_col2 + Select Operator [SEL_223] (rows=11975292 width=237) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_222] (rows=11975292 width=237) + predicate:(wr_refunded_addr_sk is not null and wr_returning_cdemo_sk is not null and wr_refunded_cdemo_sk is not null and wr_reason_sk is not null and wr_item_sk is not null and wr_order_number is not null) + TableScan [TS_9] (rows=14398467 width=237) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_227] + PartitionCols:_col0 + Select Operator [SEL_226] (rows=3529412 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_225] (rows=3529412 width=187) + predicate:((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) + TableScan [TS_12] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_40] + SHUFFLE [RS_41] PartitionCols:_col1, _col3 - Merge Join Operator [MERGEJOIN_198] (rows=51392014 width=39) - Conds:RS_215._col0=RS_207._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Merge Join Operator [MERGEJOIN_199] (rows=51392014 width=39) + Conds:RS_216._col0=RS_208._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_207] + PARTITION_ONLY_SHUFFLE [RS_208] PartitionCols:_col0 - Select Operator [SEL_206] (rows=652 width=4) + Select Operator [SEL_207] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_205] (rows=652 width=8) + Filter Operator [FIL_206] (rows=652 width=8) predicate:((d_year = 1998) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] + SHUFFLE [RS_216] PartitionCols:_col0 - Select Operator [SEL_214] (rows=143931136 width=43) + Select Operator [SEL_215] (rows=143931136 width=43) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_213] (rows=143931136 width=243) - predicate:(((ws_sales_price >= 100) or (ws_sales_price <= 150) or (ws_sales_price >= 50) or (ws_sales_price <= 100) or (ws_sales_price >= 150) or (ws_sales_price <= 200)) and ((ws_net_profit >= 100) or (ws_net_profit <= 200) or (ws_net_profit >= 150) or (ws_net_profit <= 300) or (ws_net_profit >= 50) or (ws_net_profit <= 250)) and ws_sold_date_sk is not null and ws_web_page_sk is not null and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_38_date_dim_d_date_sk_min) AND DynamicValue(RS_38_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_38_date_dim_d_date_sk_bloom_filter))) + Filter Operator [FIL_214] (rows=143931136 width=243) + predicate:(((ws_sales_price >= 100) or (ws_sales_price <= 150) or (ws_sales_price >= 50) or (ws_sales_price <= 100) or (ws_sales_price >= 150) or (ws_sales_price <= 200)) and ((ws_net_profit >= 100) or (ws_net_profit <= 200) or (ws_net_profit >= 150) or (ws_net_profit <= 300) or (ws_net_profit >= 50) or (ws_net_profit <= 250)) and ws_sold_date_sk is not null and ws_web_page_sk is not null and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_39_date_dim_d_date_sk_min) AND DynamicValue(RS_39_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_39_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=144002668 width=243) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_212] - Group By Operator [GBY_211] (rows=1 width=12) + BROADCAST [RS_213] + Group By Operator [GBY_212] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_210] - Group By Operator [GBY_209] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_211] + Group By Operator [GBY_210] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_208] (rows=652 width=4) + Select Operator [SEL_209] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_206] + Please refer to the previous Select Operator [SEL_207] diff --git a/ql/src/test/results/clientpositive/perf/tez/query86.q.out b/ql/src/test/results/clientpositive/perf/tez/query86.q.out index adfb3cb48d..d946dc7a3b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query86.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query86.q.out @@ -59,91 +59,91 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Map 6 <- Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_86] - Limit [LIM_85] (rows=100 width=490) + Reducer 5 vectorized + File Output Operator [FS_87] + Limit [LIM_86] (rows=100 width=490) Number of rows:100 - Select Operator [SEL_84] (rows=3060 width=490) + Select Operator [SEL_85] (rows=3060 width=490) Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_83] - Select Operator [SEL_82] (rows=3060 width=490) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_84] + Select Operator [SEL_83] (rows=3060 width=490) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Top N Key Operator [TNK_81] (rows=3060 width=302) + Top N Key Operator [TNK_82] (rows=3060 width=302) keys:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN (((grouping(_col3, 1L) + grouping(_col3, 0L)) = 0L)) THEN (_col0) ELSE (null) END, rank_window_0,top n:100 - PTF Operator [PTF_80] (rows=3060 width=302) + PTF Operator [PTF_81] (rows=3060 width=302) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 DESC NULLS LAST","partition by:":"(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] - Select Operator [SEL_79] (rows=3060 width=302) + Select Operator [SEL_80] (rows=3060 width=302) Output:["_col0","_col1","_col2","_col3"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_78] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_79] PartitionCols:(grouping(_col3, 1L) + grouping(_col3, 0L)), CASE WHEN ((grouping(_col3, 0L) = UDFToLong(0))) THEN (_col0) ELSE (CAST( null AS STRING)) END - Select Operator [SEL_77] (rows=3060 width=302) + Select Operator [SEL_78] (rows=3060 width=302) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_76] (rows=3060 width=302) + Group By Operator [GBY_77] (rows=3060 width=302) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_19] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_17] (rows=88740 width=302) + Group By Operator [GBY_18] (rows=88740 width=302) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0L - Select Operator [SEL_15] (rows=24992810 width=293) + Select Operator [SEL_16] (rows=24992810 width=293) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_61] (rows=24992810 width=293) - Conds:RS_12._col1=RS_75._col0(Inner),Output:["_col2","_col5","_col6"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_75] + Merge Join Operator [MERGEJOIN_62] (rows=24992810 width=293) + Conds:RS_65._col0=RS_14._col1(Inner),Output:["_col1","_col2","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_65] PartitionCols:_col0 - Select Operator [SEL_74] (rows=462000 width=186) + Select Operator [SEL_64] (rows=462000 width=186) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_73] (rows=462000 width=186) + Filter Operator [FIL_63] (rows=462000 width=186) predicate:i_item_sk is not null - TableScan [TS_6] (rows=462000 width=186) + TableScan [TS_0] (rows=462000 width=186) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_60] (rows=24992810 width=115) - Conds:RS_72._col0=RS_64._col0(Inner),Output:["_col1","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_64] + Merge Join Operator [MERGEJOIN_61] (rows=24992810 width=115) + Conds:RS_76._col0=RS_68._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_68] PartitionCols:_col0 - Select Operator [SEL_63] (rows=317 width=4) + Select Operator [SEL_67] (rows=317 width=4) Output:["_col0"] - Filter Operator [FIL_62] (rows=317 width=8) + Filter Operator [FIL_66] (rows=317 width=8) predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=8) + TableScan [TS_6] (rows=73049 width=8) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_72] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_76] PartitionCols:_col0 - Select Operator [SEL_71] (rows=143966864 width=119) + Select Operator [SEL_75] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_70] (rows=143966864 width=119) + Filter Operator [FIL_74] (rows=143966864 width=119) predicate:(ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_10_d1_d_date_sk_min) AND DynamicValue(RS_10_d1_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_d1_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=119) + TableScan [TS_3] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_net_paid"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_69] - Group By Operator [GBY_68] (rows=1 width=12) + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_73] + Group By Operator [GBY_72] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_67] - Group By Operator [GBY_66] (rows=1 width=12) + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_71] + Group By Operator [GBY_70] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_65] (rows=317 width=4) + Select Operator [SEL_69] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_63] + Please refer to the previous Select Operator [SEL_67] diff --git a/ql/src/test/results/clientpositive/perf/tez/query87.q.out b/ql/src/test/results/clientpositive/perf/tez/query87.q.out index 529b7bdc53..a1eb8ae774 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query87.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query87.q.out @@ -55,236 +55,236 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE) -Map 21 <- Reducer 15 (BROADCAST_EDGE) -Map 22 <- Reducer 19 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 10 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 13 <- Map 20 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 15 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 10 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) -Reducer 17 <- Map 20 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 19 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Map 20 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 8 <- Union 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Map 13 <- Reducer 16 (BROADCAST_EDGE) +Map 21 <- Reducer 18 (BROADCAST_EDGE) +Map 22 <- Reducer 20 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 11 <- Map 1 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 15 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 18 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 15 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 20 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 7 <- Union 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 9 vectorized - File Output Operator [FS_272] - Group By Operator [GBY_271] (rows=1 width=8) + Reducer 8 vectorized + File Output Operator [FS_275] + Group By Operator [GBY_274] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_270] - Group By Operator [GBY_269] (rows=1 width=8) + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_273] + Group By Operator [GBY_272] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_268] (rows=8062883 width=16) - Filter Operator [FIL_267] (rows=8062883 width=16) + Select Operator [SEL_271] (rows=8062883 width=16) + Filter Operator [FIL_270] (rows=8062883 width=16) predicate:((_col3 > 0L) and ((_col3 * 2L) = _col4)) - Select Operator [SEL_266] (rows=48377300 width=16) + Select Operator [SEL_269] (rows=48377300 width=16) Output:["_col3","_col4"] - Group By Operator [GBY_265] (rows=48377300 width=290) + Group By Operator [GBY_268] (rows=48377300 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 7 [SIMPLE_EDGE] - <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_296] + <-Union 6 [SIMPLE_EDGE] + <-Reducer 12 [CONTAINS] vectorized + Reduce Output Operator [RS_299] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_295] (rows=48377300 width=290) + Group By Operator [GBY_298] (rows=48377300 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 - Select Operator [SEL_294] (rows=48377300 width=290) + Select Operator [SEL_297] (rows=48377300 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Select Operator [SEL_293] (rows=24986582 width=290) + Select Operator [SEL_296] (rows=24986582 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_292] (rows=24986582 width=282) + Group By Operator [GBY_295] (rows=24986582 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_291] (rows=24986582 width=274) + Select Operator [SEL_294] (rows=24986582 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_290] (rows=24986582 width=274) + Group By Operator [GBY_293] (rows=24986582 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_80] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_83] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_79] (rows=24986582 width=274) - Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 - Merge Join Operator [MERGEJOIN_192] (rows=24986582 width=274) - Conds:RS_75._col1=RS_249._col0(Inner),Output:["_col3","_col5","_col6"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_249] + Group By Operator [GBY_82] (rows=24986582 width=274) + Output:["_col0","_col1","_col2"],keys:_col2, _col1, _col6 + Merge Join Operator [MERGEJOIN_195] (rows=24986582 width=274) + Conds:RS_233._col0=RS_79._col1(Inner),Output:["_col1","_col2","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_233] PartitionCols:_col0 - Select Operator [SEL_246] (rows=80000000 width=184) + Select Operator [SEL_230] (rows=80000000 width=184) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_245] (rows=80000000 width=184) + Filter Operator [FIL_229] (rows=80000000 width=184) predicate:c_customer_sk is not null - TableScan [TS_6] (rows=80000000 width=184) + TableScan [TS_0] (rows=80000000 width=184) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_first_name","c_last_name"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_75] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_79] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_191] (rows=24986582 width=97) - Conds:RS_289._col0=RS_232._col0(Inner),Output:["_col1","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_232] + Merge Join Operator [MERGEJOIN_194] (rows=24986582 width=97) + Conds:RS_292._col0=RS_240._col0(Inner),Output:["_col1","_col3"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_240] PartitionCols:_col0 - Select Operator [SEL_227] (rows=317 width=98) + Select Operator [SEL_235] (rows=317 width=98) Output:["_col0","_col1"] - Filter Operator [FIL_226] (rows=317 width=102) + Filter Operator [FIL_234] (rows=317 width=102) predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=102) + TableScan [TS_6] (rows=73049 width=102) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_month_seq"] <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] + SHUFFLE [RS_292] PartitionCols:_col0 - Select Operator [SEL_288] (rows=143930993 width=7) + Select Operator [SEL_291] (rows=143930993 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_287] (rows=143930993 width=7) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_73_date_dim_d_date_sk_min) AND DynamicValue(RS_73_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_73_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_63] (rows=144002668 width=7) + Filter Operator [FIL_290] (rows=143930993 width=7) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_75_date_dim_d_date_sk_min) AND DynamicValue(RS_75_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_75_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_68] (rows=144002668 width=7) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_286] - Group By Operator [GBY_285] (rows=1 width=12) + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_289] + Group By Operator [GBY_288] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_239] - Group By Operator [GBY_236] (rows=1 width=12) + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_247] + Group By Operator [GBY_244] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_233] (rows=317 width=4) + Select Operator [SEL_241] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_227] - <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_264] + Please refer to the previous Select Operator [SEL_235] + <-Reducer 5 [CONTAINS] vectorized + Reduce Output Operator [RS_267] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_263] (rows=48377300 width=290) + Group By Operator [GBY_266] (rows=48377300 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 - Select Operator [SEL_262] (rows=48377300 width=290) + Select Operator [SEL_265] (rows=48377300 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Select Operator [SEL_261] (rows=23390718 width=290) + Select Operator [SEL_264] (rows=23390718 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_260] (rows=23390718 width=282) + Group By Operator [GBY_263] (rows=23390718 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_259] (rows=23390718 width=290) + Select Operator [SEL_262] (rows=23390718 width=290) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_258] (rows=23390718 width=290) + Filter Operator [FIL_261] (rows=23390718 width=290) predicate:((_col3 > 0L) and ((_col3 * 2L) = _col4)) - Group By Operator [GBY_257] (rows=140344308 width=290) + Group By Operator [GBY_260] (rows=140344308 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 14 [CONTAINS] vectorized - Reduce Output Operator [RS_284] + <-Union 4 [SIMPLE_EDGE] + <-Reducer 10 [CONTAINS] vectorized + Reduce Output Operator [RS_287] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_283] (rows=140344308 width=290) + Group By Operator [GBY_286] (rows=140344308 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 - Select Operator [SEL_282] (rows=140344308 width=290) + Select Operator [SEL_285] (rows=140344308 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Select Operator [SEL_281] (rows=49146883 width=290) + Select Operator [SEL_284] (rows=49146883 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_280] (rows=49146883 width=282) + Group By Operator [GBY_283] (rows=49146883 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_279] (rows=49146883 width=274) + Select Operator [SEL_282] (rows=49146883 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_278] (rows=49146883 width=274) + Group By Operator [GBY_281] (rows=49146883 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_42] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_44] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_41] (rows=49146883 width=274) - Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 - Merge Join Operator [MERGEJOIN_190] (rows=49146883 width=274) - Conds:RS_37._col1=RS_248._col0(Inner),Output:["_col3","_col5","_col6"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_248] + Group By Operator [GBY_43] (rows=49146883 width=274) + Output:["_col0","_col1","_col2"],keys:_col2, _col1, _col6 + Merge Join Operator [MERGEJOIN_193] (rows=49146883 width=274) + Conds:RS_232._col0=RS_40._col1(Inner),Output:["_col1","_col2","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_232] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_246] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_37] + Please refer to the previous Select Operator [SEL_230] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_40] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_189] (rows=49146883 width=97) - Conds:RS_277._col0=RS_230._col0(Inner),Output:["_col1","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_230] + Merge Join Operator [MERGEJOIN_192] (rows=49146883 width=97) + Conds:RS_280._col0=RS_238._col0(Inner),Output:["_col1","_col3"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_238] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_227] + Please refer to the previous Select Operator [SEL_235] <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_277] + SHUFFLE [RS_280] PartitionCols:_col0 - Select Operator [SEL_276] (rows=285117831 width=7) + Select Operator [SEL_279] (rows=285117831 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_275] (rows=285117831 width=7) - predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_25] (rows=287989836 width=7) + Filter Operator [FIL_278] (rows=285117831 width=7) + predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_36_date_dim_d_date_sk_min) AND DynamicValue(RS_36_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_36_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_29] (rows=287989836 width=7) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk"] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_274] - Group By Operator [GBY_273] (rows=1 width=12) + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_277] + Group By Operator [GBY_276] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_238] - Group By Operator [GBY_235] (rows=1 width=12) + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_246] + Group By Operator [GBY_243] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_231] (rows=317 width=4) + Select Operator [SEL_239] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_227] - <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_256] + Please refer to the previous Select Operator [SEL_235] + <-Reducer 3 [CONTAINS] vectorized + Reduce Output Operator [RS_259] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_255] (rows=140344308 width=290) + Group By Operator [GBY_258] (rows=140344308 width=290) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","sum(_col4)"],keys:_col0, _col1, _col2 - Select Operator [SEL_254] (rows=140344308 width=290) + Select Operator [SEL_257] (rows=140344308 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Select Operator [SEL_253] (rows=91197425 width=290) + Select Operator [SEL_256] (rows=91197425 width=290) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_252] (rows=91197425 width=282) + Group By Operator [GBY_255] (rows=91197425 width=282) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 - Select Operator [SEL_251] (rows=91197425 width=274) + Select Operator [SEL_254] (rows=91197425 width=274) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_250] (rows=91197425 width=274) + Group By Operator [GBY_253] (rows=91197425 width=274) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_16] (rows=91197425 width=274) - Output:["_col0","_col1","_col2"],keys:_col6, _col5, _col3 - Merge Join Operator [MERGEJOIN_188] (rows=91197425 width=274) - Conds:RS_12._col1=RS_247._col0(Inner),Output:["_col3","_col5","_col6"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_247] + Group By Operator [GBY_17] (rows=91197425 width=274) + Output:["_col0","_col1","_col2"],keys:_col2, _col1, _col6 + Merge Join Operator [MERGEJOIN_191] (rows=91197425 width=274) + Conds:RS_231._col0=RS_14._col1(Inner),Output:["_col1","_col2","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_246] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + Please refer to the previous Select Operator [SEL_230] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_187] (rows=91197425 width=96) - Conds:RS_244._col0=RS_228._col0(Inner),Output:["_col1","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_228] + Merge Join Operator [MERGEJOIN_190] (rows=91197425 width=96) + Conds:RS_252._col0=RS_236._col0(Inner),Output:["_col1","_col3"] + <-Map 15 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_236] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_227] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_244] + Please refer to the previous Select Operator [SEL_235] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_252] PartitionCols:_col0 - Select Operator [SEL_243] (rows=525327388 width=7) + Select Operator [SEL_251] (rows=525327388 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_242] (rows=525327388 width=7) + Filter Operator [FIL_250] (rows=525327388 width=7) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=7) + TableScan [TS_3] (rows=575995635 width=7) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_241] - Group By Operator [GBY_240] (rows=1 width=12) + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_249] + Group By Operator [GBY_248] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_237] - Group By Operator [GBY_234] (rows=1 width=12) + <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_245] + Group By Operator [GBY_242] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_229] (rows=317 width=4) + Select Operator [SEL_237] (rows=317 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_227] + Please refer to the previous Select Operator [SEL_235] diff --git a/ql/src/test/results/clientpositive/perf/tez/query91.q.out b/ql/src/test/results/clientpositive/perf/tez/query91.q.out index de09f38d91..423ae8e846 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query91.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query91.q.out @@ -79,120 +79,120 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) Reducer 12 <- Map 14 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_170] - Select Operator [SEL_169] (rows=2376 width=406) + Reducer 6 vectorized + File Output Operator [FS_171] + Select Operator [SEL_170] (rows=2376 width=406) Output:["_col0","_col1","_col2","_col3"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_168] - Select Operator [SEL_167] (rows=2376 width=518) + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_169] + Select Operator [SEL_168] (rows=2376 width=518) Output:["_col0","_col1","_col2","_col4"] - Group By Operator [GBY_166] (rows=2376 width=585) + Group By Operator [GBY_167] (rows=2376 width=585) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_42] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_43] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_41] (rows=2376 width=585) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col11)"],keys:_col5, _col6, _col14, _col15, _col16 - Merge Join Operator [MERGEJOIN_144] (rows=231957 width=473) - Conds:RS_37._col2=RS_165._col0(Inner),Output:["_col5","_col6","_col11","_col14","_col15","_col16"] + Group By Operator [GBY_42] (rows=2376 width=585) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col11)"],keys:_col6, _col7, _col14, _col15, _col16 + Merge Join Operator [MERGEJOIN_145] (rows=231957 width=473) + Conds:RS_38._col3=RS_166._col0(Inner),Output:["_col6","_col7","_col11","_col14","_col15","_col16"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_165] + SHUFFLE [RS_166] PartitionCols:_col0 - Select Operator [SEL_164] (rows=3600 width=4) + Select Operator [SEL_165] (rows=3600 width=4) Output:["_col0"] - Filter Operator [FIL_163] (rows=3600 width=96) + Filter Operator [FIL_164] (rows=3600 width=96) predicate:((hd_buy_potential like '0-500%') and hd_demo_sk is not null) - TableScan [TS_25] (rows=7200 width=96) + TableScan [TS_29] (rows=7200 width=96) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_buy_potential"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_143] (rows=463913 width=475) - Conds:RS_34._col0=RS_35._col1(Inner),Output:["_col2","_col5","_col6","_col11","_col14","_col15","_col16"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_144] (rows=463913 width=475) + Conds:RS_35._col1=RS_36._col1(Inner),Output:["_col3","_col6","_col7","_col11","_col14","_col15","_col16"] <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_35] + SHUFFLE [RS_36] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_142] (rows=657590 width=312) - Conds:RS_21._col2=RS_162._col0(Inner),Output:["_col1","_col3","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_143] (rows=657590 width=312) + Conds:RS_25._col2=RS_163._col0(Inner),Output:["_col1","_col3","_col6","_col7","_col8"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_162] + SHUFFLE [RS_163] PartitionCols:_col0 - Select Operator [SEL_161] (rows=60 width=298) + Select Operator [SEL_162] (rows=60 width=298) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_160] (rows=60 width=298) + Filter Operator [FIL_161] (rows=60 width=298) predicate:cc_call_center_sk is not null - TableScan [TS_15] (rows=60 width=298) + TableScan [TS_19] (rows=60 width=298) default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_call_center_id","cc_name","cc_manager"] <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_21] + SHUFFLE [RS_25] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_141] (rows=657590 width=19) - Conds:RS_156._col0=RS_159._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_142] (rows=657590 width=19) + Conds:RS_157._col0=RS_160._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] + SHUFFLE [RS_157] PartitionCols:_col0 - Select Operator [SEL_155] (rows=27658583 width=121) + Select Operator [SEL_156] (rows=27658583 width=121) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_154] (rows=27658583 width=121) + Filter Operator [FIL_155] (rows=27658583 width=121) predicate:(cr_call_center_sk is not null and cr_returning_customer_sk is not null and cr_returned_date_sk is not null) - TableScan [TS_9] (rows=28798881 width=121) + TableScan [TS_13] (rows=28798881 width=121) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_call_center_sk","cr_net_loss"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] + SHUFFLE [RS_160] PartitionCols:_col0 - Select Operator [SEL_158] (rows=50 width=4) + Select Operator [SEL_159] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_157] (rows=50 width=12) + Filter Operator [FIL_158] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 11) and d_date_sk is not null) - TableScan [TS_12] (rows=73049 width=12) + TableScan [TS_16] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_140] (rows=213205 width=183) - Conds:RS_31._col3=RS_153._col0(Inner),Output:["_col0","_col2","_col5","_col6"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_141] (rows=213205 width=183) + Conds:RS_148._col0=RS_33._col3(Inner),Output:["_col1","_col3","_col6","_col7"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_148] PartitionCols:_col0 - Select Operator [SEL_152] (rows=8000000 width=4) + Select Operator [SEL_147] (rows=8000000 width=4) Output:["_col0"] - Filter Operator [FIL_151] (rows=8000000 width=112) + Filter Operator [FIL_146] (rows=8000000 width=112) predicate:((ca_gmt_offset = -7) and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=112) + TableScan [TS_0] (rows=40000000 width=112) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_31] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_33] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_139] (rows=1066023 width=187) - Conds:RS_147._col1=RS_150._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_147] + Merge Join Operator [MERGEJOIN_140] (rows=1066023 width=187) + Conds:RS_151._col1=RS_154._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] PartitionCols:_col1 - Select Operator [SEL_146] (rows=74500295 width=15) + Select Operator [SEL_150] (rows=74500295 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_145] (rows=74500295 width=15) + Filter Operator [FIL_149] (rows=74500295 width=15) predicate:(c_current_hdemo_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null and c_current_addr_sk is not null) - TableScan [TS_0] (rows=80000000 width=15) + TableScan [TS_3] (rows=80000000 width=15) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_150] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_154] PartitionCols:_col0 - Select Operator [SEL_149] (rows=26269 width=183) + Select Operator [SEL_153] (rows=26269 width=183) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_148] (rows=26269 width=183) + Filter Operator [FIL_152] (rows=26269 width=183) predicate:((cd_marital_status) IN ('M', 'W') and (cd_education_status) IN ('Unknown', 'Advanced Degree') and (struct(cd_marital_status,cd_education_status)) IN (const struct('M','Unknown'), const struct('W','Advanced Degree')) and cd_demo_sk is not null) - TableScan [TS_3] (rows=1861800 width=183) + TableScan [TS_6] (rows=1861800 width=183) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query93.q.out b/ql/src/test/results/clientpositive/perf/tez/query93.q.out index f4cbf00ef0..9e3bc2f592 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query93.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query93.q.out @@ -43,79 +43,79 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 8 <- Reducer 6 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Map 1 <- Reducer 7 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 vectorized - File Output Operator [FS_82] - Limit [LIM_81] (rows=100 width=112) + Reducer 4 vectorized + File Output Operator [FS_83] + Limit [LIM_82] (rows=100 width=112) Number of rows:100 - Select Operator [SEL_80] (rows=38308 width=108) + Select Operator [SEL_81] (rows=38308 width=108) Output:["_col0","_col1"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_79] - Top N Key Operator [TNK_78] (rows=38308 width=112) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_80] + Top N Key Operator [TNK_79] (rows=38308 width=112) keys:_col1, _col0,top n:100 - Group By Operator [GBY_77] (rows=38308 width=112) + Group By Operator [GBY_78] (rows=38308 width=112) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_19] PartitionCols:_col0 - Group By Operator [GBY_17] (rows=306464 width=112) + Group By Operator [GBY_18] (rows=306464 width=112) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_15] (rows=15586502 width=119) + Select Operator [SEL_16] (rows=15586502 width=119) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_65] (rows=15586502 width=119) - Conds:RS_12._col0, _col2=RS_76._col0, _col2(Inner),Output:["_col3","_col4","_col7","_col9","_col10","_col11"] - <-Reducer 2 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_12] + Merge Join Operator [MERGEJOIN_66] (rows=15586502 width=119) + Conds:RS_77._col0, _col2=RS_14._col0, _col2(Inner),Output:["_col1","_col3","_col4","_col5","_col9","_col10"] + <-Reducer 6 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_14] PartitionCols:_col0, _col2 - Merge Join Operator [MERGEJOIN_64] (rows=1522298 width=12) - Conds:RS_68._col1=RS_71._col0(Inner),Output:["_col0","_col2","_col3","_col4"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_68] + Merge Join Operator [MERGEJOIN_65] (rows=1522298 width=12) + Conds:RS_69._col1=RS_72._col0(Inner),Output:["_col0","_col2","_col3","_col4"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_69] PartitionCols:_col1 - Select Operator [SEL_67] (rows=55574563 width=19) + Select Operator [SEL_68] (rows=55574563 width=19) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_66] (rows=55574563 width=15) + Filter Operator [FIL_67] (rows=55574563 width=15) predicate:(sr_reason_sk is not null and sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_0] (rows=57591150 width=15) + TableScan [TS_3] (rows=57591150 width=15) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_reason_sk","sr_ticket_number","sr_return_quantity"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_71] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_72] PartitionCols:_col0 - Select Operator [SEL_70] (rows=1 width=4) + Select Operator [SEL_71] (rows=1 width=4) Output:["_col0"] - Filter Operator [FIL_69] (rows=1 width=101) + Filter Operator [FIL_70] (rows=1 width=101) predicate:((r_reason_desc = 'Did not like the warranty') and r_reason_sk is not null) - TableScan [TS_3] (rows=72 width=101) + TableScan [TS_6] (rows=72 width=101) default@reason,reason,Tbl:COMPLETE,Col:COMPLETE,Output:["r_reason_sk","r_reason_desc"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_76] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_77] PartitionCols:_col0, _col2 - Select Operator [SEL_75] (rows=575995635 width=234) + Select Operator [SEL_76] (rows=575995635 width=234) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_74] (rows=575995635 width=122) - predicate:(ss_item_sk is not null and ss_ticket_number is not null and ss_ticket_number BETWEEN DynamicValue(RS_12_store_returns_sr_ticket_number_min) AND DynamicValue(RS_12_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_12_store_returns_sr_ticket_number_bloom_filter))) - TableScan [TS_6] (rows=575995635 width=122) + Filter Operator [FIL_75] (rows=575995635 width=122) + predicate:(ss_item_sk is not null and ss_ticket_number is not null and ss_ticket_number BETWEEN DynamicValue(RS_14_store_returns_sr_ticket_number_min) AND DynamicValue(RS_14_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_14_store_returns_sr_ticket_number_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=122) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_ticket_number","ss_quantity","ss_sales_price"] - <-Reducer 6 [BROADCAST_EDGE] vectorized - BROADCAST [RS_73] - Group By Operator [GBY_72] (rows=1 width=12) + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_74] + Group By Operator [GBY_73] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_61] - Group By Operator [GBY_60] (rows=1 width=12) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_42] + Group By Operator [GBY_41] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_59] (rows=1522298 width=8) + Select Operator [SEL_40] (rows=1522298 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_64] + Please refer to the previous Merge Join Operator [MERGEJOIN_65] diff --git a/ql/src/test/results/clientpositive/perf/tez/query94.q.out b/ql/src/test/results/clientpositive/perf/tez/query94.q.out index 6a6d53e5bf..4be206ce5f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query94.q.out @@ -69,149 +69,151 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 12 (BROADCAST_EDGE) -Map 14 <- Reducer 9 (BROADCAST_EDGE) -Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Map 10 <- Reducer 9 (BROADCAST_EDGE) +Map 14 <- Reducer 8 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) Reducer 16 <- Map 15 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 1 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_160] - Group By Operator [GBY_159] (rows=1 width=232) + Reducer 7 vectorized + File Output Operator [FS_161] + Group By Operator [GBY_160] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_158] - Group By Operator [GBY_157] (rows=1 width=232) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_159] + Group By Operator [GBY_158] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_156] (rows=5022875 width=228) + Group By Operator [GBY_157] (rows=5022875 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_70] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_71] PartitionCols:_col0 - Group By Operator [GBY_69] (rows=5022875 width=228) + Group By Operator [GBY_70] (rows=5022875 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 - Select Operator [SEL_42] (rows=5022875 width=229) + Select Operator [SEL_43] (rows=5022875 width=229) Output:["_col4","_col5","_col6"] - Filter Operator [FIL_41] (rows=5022875 width=229) + Filter Operator [FIL_42] (rows=5022875 width=229) predicate:_col13 is null - Merge Join Operator [MERGEJOIN_126] (rows=10045750 width=229) - Conds:RS_38._col4=RS_155._col1(Left Outer),Output:["_col4","_col5","_col6","_col13"] + Merge Join Operator [MERGEJOIN_127] (rows=10045750 width=229) + Conds:RS_39._col4=RS_156._col1(Left Outer),Output:["_col4","_col5","_col6","_col13"] <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_155] + SHUFFLE [RS_156] PartitionCols:_col1 - Select Operator [SEL_154] (rows=8007986 width=8) + Select Operator [SEL_155] (rows=8007986 width=8) Output:["_col0","_col1"] - Group By Operator [GBY_153] (rows=8007986 width=4) + Group By Operator [GBY_154] (rows=8007986 width=4) Output:["_col0"],keys:KEY._col0 <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_152] + SHUFFLE [RS_153] PartitionCols:_col0 - Group By Operator [GBY_151] (rows=14398467 width=4) + Group By Operator [GBY_152] (rows=14398467 width=4) Output:["_col0"],keys:wr_order_number - Filter Operator [FIL_150] (rows=14398467 width=4) + Filter Operator [FIL_151] (rows=14398467 width=4) predicate:wr_order_number is not null - TableScan [TS_25] (rows=14398467 width=4) + TableScan [TS_26] (rows=14398467 width=4) default@web_returns,wr1,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_order_number"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_38] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_39] PartitionCols:_col4 - Select Operator [SEL_37] (rows=5022875 width=231) + Select Operator [SEL_38] (rows=5022875 width=231) Output:["_col4","_col5","_col6"] - Merge Join Operator [MERGEJOIN_125] (rows=5022875 width=235) - Conds:RS_34._col4=RS_149._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} - <-Reducer 4 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_34] + Merge Join Operator [MERGEJOIN_126] (rows=5022875 width=235) + Conds:RS_35._col4=RS_150._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} + <-Reducer 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_35] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_124] (rows=5022875 width=231) - Conds:RS_18._col2=RS_143._col0(Inner),Output:["_col3","_col4","_col5","_col6"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] - PartitionCols:_col0 - Select Operator [SEL_142] (rows=12 width=91) - Output:["_col0"] - Filter Operator [FIL_141] (rows=12 width=92) - predicate:((web_company_name = 'pri') and web_site_sk is not null) - TableScan [TS_9] (rows=84 width=92) - default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_company_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_123] (rows=15673790 width=235) - Conds:RS_15._col1=RS_129._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_129] - PartitionCols:_col0 - Select Operator [SEL_128] (rows=784314 width=90) - Output:["_col0"] - Filter Operator [FIL_127] (rows=784314 width=90) - predicate:((ca_state = 'TX') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=90) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_122] (rows=15987241 width=239) - Conds:RS_137._col0=RS_140._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_137] - PartitionCols:_col0 - Select Operator [SEL_136] (rows=143895019 width=243) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_135] (rows=143895019 width=243) - predicate:(ws_web_site_sk is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_order_number is not null and ws_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=243) - default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_134] - Group By Operator [GBY_133] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_132] - Group By Operator [GBY_131] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_130] (rows=784314 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_128] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_140] - PartitionCols:_col0 - Select Operator [SEL_139] (rows=8116 width=98) - Output:["_col0"] - Filter Operator [FIL_138] (rows=8116 width=98) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=98) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + Select Operator [SEL_22] (rows=5022875 width=231) + Output:["_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_125] (rows=5022875 width=231) + Conds:RS_19._col4=RS_144._col0(Inner),Output:["_col5","_col6","_col7","_col8"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_144] + PartitionCols:_col0 + Select Operator [SEL_143] (rows=12 width=91) + Output:["_col0"] + Filter Operator [FIL_142] (rows=12 width=92) + predicate:((web_company_name = 'pri') and web_site_sk is not null) + TableScan [TS_13] (rows=84 width=92) + default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_company_name"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_124] (rows=15673790 width=235) + Conds:RS_130._col0=RS_17._col1(Inner),Output:["_col4","_col5","_col6","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_130] + PartitionCols:_col0 + Select Operator [SEL_129] (rows=784314 width=90) + Output:["_col0"] + Filter Operator [FIL_128] (rows=784314 width=90) + predicate:((ca_state = 'TX') and ca_address_sk is not null) + TableScan [TS_0] (rows=40000000 width=90) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_123] (rows=15987241 width=239) + Conds:RS_138._col0=RS_141._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_138] + PartitionCols:_col0 + Select Operator [SEL_137] (rows=143895019 width=243) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_136] (rows=143895019 width=243) + predicate:(ws_web_site_sk is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_order_number is not null and ws_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) + TableScan [TS_3] (rows=144002668 width=243) + default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_135] + Group By Operator [GBY_134] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_133] + Group By Operator [GBY_132] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_131] (rows=784314 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_129] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_141] + PartitionCols:_col0 + Select Operator [SEL_140] (rows=8116 width=98) + Output:["_col0"] + Filter Operator [FIL_139] (rows=8116 width=98) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=98) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] + SHUFFLE [RS_150] PartitionCols:_col0 - Group By Operator [GBY_148] (rows=143966743 width=7) + Group By Operator [GBY_149] (rows=143966743 width=7) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_147] (rows=143966743 width=7) + Select Operator [SEL_148] (rows=143966743 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_146] (rows=143966743 width=7) - predicate:(ws_warehouse_sk is not null and ws_order_number is not null and ws_order_number BETWEEN DynamicValue(RS_34_ws1_ws_order_number_min) AND DynamicValue(RS_34_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_34_ws1_ws_order_number_bloom_filter))) - TableScan [TS_22] (rows=144002668 width=7) + Filter Operator [FIL_147] (rows=143966743 width=7) + predicate:(ws_warehouse_sk is not null and ws_order_number is not null and ws_order_number BETWEEN DynamicValue(RS_35_ws1_ws_order_number_min) AND DynamicValue(RS_35_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_35_ws1_ws_order_number_bloom_filter))) + TableScan [TS_23] (rows=144002668 width=7) default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_145] - Group By Operator [GBY_144] (rows=1 width=12) + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_146] + Group By Operator [GBY_145] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_112] - Group By Operator [GBY_111] (rows=1 width=12) + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_113] + Group By Operator [GBY_112] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_110] (rows=5022875 width=8) + Select Operator [SEL_111] (rows=5022875 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_124] + Please refer to the previous Select Operator [SEL_22] diff --git a/ql/src/test/results/clientpositive/perf/tez/query95.q.out b/ql/src/test/results/clientpositive/perf/tez/query95.q.out index f15afbed4b..a3063d0403 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query95.q.out @@ -75,223 +75,223 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 13 (BROADCAST_EDGE) -Map 15 <- Reducer 10 (BROADCAST_EDGE) -Map 18 <- Reducer 10 (BROADCAST_EDGE) -Map 19 <- Reducer 9 (BROADCAST_EDGE) -Map 23 <- Reducer 9 (BROADCAST_EDGE) -Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) -Reducer 21 <- Map 24 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (SIMPLE_EDGE) -Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 22 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 14 (BROADCAST_EDGE) +Map 10 <- Reducer 19 (BROADCAST_EDGE) +Map 15 <- Reducer 19 (BROADCAST_EDGE) +Map 21 <- Reducer 20 (BROADCAST_EDGE) +Map 8 <- Reducer 14 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 16 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 18 <- Map 24 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 20 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 21 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 vectorized - File Output Operator [FS_284] - Group By Operator [GBY_283] (rows=1 width=232) + Reducer 7 vectorized + File Output Operator [FS_287] + Group By Operator [GBY_286] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_282] - Group By Operator [GBY_281] (rows=1 width=232) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_285] + Group By Operator [GBY_284] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_280] (rows=5022875 width=228) + Group By Operator [GBY_283] (rows=5022875 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_111] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_114] PartitionCols:_col0 - Group By Operator [GBY_110] (rows=5022875 width=228) - Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3 - Merge Join Operator [MERGEJOIN_237] (rows=5022875 width=227) - Conds:RS_61._col3=RS_279._col0(Inner),Output:["_col3","_col4","_col5"] - <-Reducer 5 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_61] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_236] (rows=5022875 width=227) - Conds:RS_58._col3=RS_265._col0(Inner),Output:["_col3","_col4","_col5"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_232] (rows=5022875 width=227) - Conds:RS_55._col2=RS_254._col0(Inner),Output:["_col3","_col4","_col5"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_254] + Group By Operator [GBY_113] (rows=5022875 width=228) + Output:["_col0","_col2","_col3"],aggregations:["sum(_col7)","sum(_col8)"],keys:_col6 + Merge Join Operator [MERGEJOIN_240] (rows=5022875 width=227) + Conds:RS_282._col0=RS_65._col5(Inner),Output:["_col6","_col7","_col8"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_239] (rows=5022875 width=227) + Conds:RS_268._col0=RS_61._col4(Inner),Output:["_col5","_col6","_col7"] + <-Reducer 18 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_61] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_238] (rows=5022875 width=227) + Conds:RS_56._col3=RS_257._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_257] PartitionCols:_col0 - Select Operator [SEL_253] (rows=12 width=4) + Select Operator [SEL_256] (rows=12 width=4) Output:["_col0"] - Filter Operator [FIL_252] (rows=12 width=92) + Filter Operator [FIL_255] (rows=12 width=92) predicate:((web_company_name = 'pri') and web_site_sk is not null) - TableScan [TS_9] (rows=84 width=92) + TableScan [TS_50] (rows=84 width=92) default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_company_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_231] (rows=15673790 width=231) - Conds:RS_52._col1=RS_240._col0(Inner),Output:["_col2","_col3","_col4","_col5"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_240] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_56] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_237] (rows=15673790 width=231) + Conds:RS_243._col0=RS_54._col1(Inner),Output:["_col3","_col4","_col5","_col6"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_243] PartitionCols:_col0 - Select Operator [SEL_239] (rows=784314 width=4) + Select Operator [SEL_242] (rows=784314 width=4) Output:["_col0"] - Filter Operator [FIL_238] (rows=784314 width=90) + Filter Operator [FIL_241] (rows=784314 width=90) predicate:((ca_state = 'TX') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=90) + TableScan [TS_37] (rows=40000000 width=90) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_52] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_54] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_230] (rows=15987241 width=235) - Conds:RS_248._col0=RS_251._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_248] + Merge Join Operator [MERGEJOIN_236] (rows=15987241 width=235) + Conds:RS_251._col0=RS_254._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_251] PartitionCols:_col0 - Select Operator [SEL_247] (rows=143895019 width=239) + Select Operator [SEL_250] (rows=143895019 width=239) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_246] (rows=143895019 width=239) + Filter Operator [FIL_249] (rows=143895019 width=239) predicate:(ws_web_site_sk is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_order_number is not null and ws_ship_addr_sk BETWEEN DynamicValue(RS_53_customer_address_ca_address_sk_min) AND DynamicValue(RS_53_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_53_customer_address_ca_address_sk_bloom_filter))) - TableScan [TS_0] (rows=144002668 width=239) + TableScan [TS_40] (rows=144002668 width=239) default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_245] - Group By Operator [GBY_244] (rows=1 width=12) + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_248] + Group By Operator [GBY_247] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_243] - Group By Operator [GBY_242] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_246] + Group By Operator [GBY_245] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_241] (rows=784314 width=4) + Select Operator [SEL_244] (rows=784314 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_239] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_251] + Please refer to the previous Select Operator [SEL_242] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_254] PartitionCols:_col0 - Select Operator [SEL_250] (rows=8116 width=98) + Select Operator [SEL_253] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_249] (rows=8116 width=98) + Filter Operator [FIL_252] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=98) + TableScan [TS_43] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_265] + <-Reducer 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_268] PartitionCols:_col0 - Group By Operator [GBY_264] (rows=14686712 width=4) + Group By Operator [GBY_267] (rows=14686712 width=4) Output:["_col0"],keys:KEY._col0 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_24] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_34] PartitionCols:_col0 - Group By Operator [GBY_23] (rows=144002668 width=4) + Group By Operator [GBY_33] (rows=144002668 width=4) Output:["_col0"],keys:_col1 - Select Operator [SEL_22] (rows=1411940834 width=11) + Select Operator [SEL_32] (rows=1411940834 width=11) Output:["_col1"] - Filter Operator [FIL_21] (rows=1411940834 width=11) + Filter Operator [FIL_31] (rows=1411940834 width=11) predicate:(_col0 <> _col2) - Merge Join Operator [MERGEJOIN_233] (rows=1411940834 width=11) - Conds:RS_260._col1=RS_263._col1(Inner),Output:["_col0","_col1","_col2"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_260] + Merge Join Operator [MERGEJOIN_235] (rows=1411940834 width=11) + Conds:RS_263._col1=RS_266._col1(Inner),Output:["_col0","_col1","_col2"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_263] PartitionCols:_col1 - Select Operator [SEL_259] (rows=144002668 width=7) + Select Operator [SEL_262] (rows=144002668 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_258] (rows=144002668 width=7) - predicate:(ws_order_number is not null and ws_order_number BETWEEN DynamicValue(RS_58_ws1_ws_order_number_min) AND DynamicValue(RS_58_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_58_ws1_ws_order_number_bloom_filter))) - TableScan [TS_12] (rows=144002668 width=7) + Filter Operator [FIL_261] (rows=144002668 width=7) + predicate:(ws_order_number is not null and ws_order_number BETWEEN DynamicValue(RS_61_ws1_ws_order_number_min) AND DynamicValue(RS_61_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_61_ws1_ws_order_number_bloom_filter))) + TableScan [TS_22] (rows=144002668 width=7) default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_256] - Group By Operator [GBY_255] (rows=1 width=12) + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_259] + Group By Operator [GBY_258] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_189] - Group By Operator [GBY_188] (rows=1 width=12) + <-Reducer 18 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_136] + Group By Operator [GBY_135] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_187] (rows=5022875 width=8) + Select Operator [SEL_134] (rows=5022875 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_232] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_263] + Please refer to the previous Merge Join Operator [MERGEJOIN_238] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_266] PartitionCols:_col1 - Select Operator [SEL_262] (rows=144002668 width=7) + Select Operator [SEL_265] (rows=144002668 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_261] (rows=144002668 width=7) - predicate:(ws_order_number is not null and ws_order_number BETWEEN DynamicValue(RS_58_ws1_ws_order_number_min) AND DynamicValue(RS_58_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_58_ws1_ws_order_number_bloom_filter))) - TableScan [TS_15] (rows=144002668 width=7) + Filter Operator [FIL_264] (rows=144002668 width=7) + predicate:(ws_order_number is not null and ws_order_number BETWEEN DynamicValue(RS_61_ws1_ws_order_number_min) AND DynamicValue(RS_61_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_61_ws1_ws_order_number_bloom_filter))) + TableScan [TS_25] (rows=144002668 width=7) default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_257] - Please refer to the previous Group By Operator [GBY_255] - <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_279] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_260] + Please refer to the previous Group By Operator [GBY_258] + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_282] PartitionCols:_col0 - Group By Operator [GBY_278] (rows=8007986 width=4) + Group By Operator [GBY_281] (rows=8007986 width=4) Output:["_col0"],keys:KEY._col0 - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_46] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_19] PartitionCols:_col0 - Group By Operator [GBY_45] (rows=14398467 width=4) + Group By Operator [GBY_18] (rows=14398467 width=4) Output:["_col0"],keys:_col14 - Merge Join Operator [MERGEJOIN_235] (rows=1384229738 width=4) - Conds:RS_41._col0=RS_277._col13(Inner),Output:["_col14"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_277] + Merge Join Operator [MERGEJOIN_234] (rows=1384229738 width=4) + Conds:RS_14._col0=RS_280._col13(Inner),Output:["_col14"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_280] PartitionCols:_col13 - Select Operator [SEL_276] (rows=14398467 width=272) + Select Operator [SEL_279] (rows=14398467 width=272) Output:["_col13"] - Filter Operator [FIL_275] (rows=14398467 width=4) + Filter Operator [FIL_278] (rows=14398467 width=4) predicate:wr_order_number is not null - TableScan [TS_38] (rows=14398467 width=4) + TableScan [TS_11] (rows=14398467 width=4) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_order_number"] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_41] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col0 - Select Operator [SEL_37] (rows=1411940834 width=4) + Select Operator [SEL_10] (rows=1411940834 width=4) Output:["_col0"] - Filter Operator [FIL_36] (rows=1411940834 width=11) + Filter Operator [FIL_9] (rows=1411940834 width=11) predicate:(_col0 <> _col2) - Merge Join Operator [MERGEJOIN_234] (rows=1411940834 width=11) - Conds:RS_271._col1=RS_274._col1(Inner),Output:["_col0","_col1","_col2"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_271] + Merge Join Operator [MERGEJOIN_233] (rows=1411940834 width=11) + Conds:RS_274._col1=RS_277._col1(Inner),Output:["_col0","_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_274] PartitionCols:_col1 - Select Operator [SEL_270] (rows=144002668 width=7) + Select Operator [SEL_273] (rows=144002668 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_269] (rows=144002668 width=7) - predicate:(ws_order_number is not null and ws_order_number BETWEEN DynamicValue(RS_61_ws1_ws_order_number_min) AND DynamicValue(RS_61_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_61_ws1_ws_order_number_bloom_filter))) - TableScan [TS_27] (rows=144002668 width=7) + Filter Operator [FIL_272] (rows=144002668 width=7) + predicate:(ws_order_number is not null and ws_order_number BETWEEN DynamicValue(RS_65_ws1_ws_order_number_min) AND DynamicValue(RS_65_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_65_ws1_ws_order_number_bloom_filter))) + TableScan [TS_0] (rows=144002668 width=7) default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_267] - Group By Operator [GBY_266] (rows=1 width=12) + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_270] + Group By Operator [GBY_269] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_208] - Group By Operator [GBY_207] (rows=1 width=12) + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_131] + Group By Operator [GBY_130] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_206] (rows=5022875 width=8) + Select Operator [SEL_129] (rows=5022875 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_236] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_274] + Please refer to the previous Merge Join Operator [MERGEJOIN_239] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_277] PartitionCols:_col1 - Select Operator [SEL_273] (rows=144002668 width=7) + Select Operator [SEL_276] (rows=144002668 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_272] (rows=144002668 width=7) - predicate:(ws_order_number is not null and ws_order_number BETWEEN DynamicValue(RS_61_ws1_ws_order_number_min) AND DynamicValue(RS_61_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_61_ws1_ws_order_number_bloom_filter))) - TableScan [TS_30] (rows=144002668 width=7) + Filter Operator [FIL_275] (rows=144002668 width=7) + predicate:(ws_order_number is not null and ws_order_number BETWEEN DynamicValue(RS_65_ws1_ws_order_number_min) AND DynamicValue(RS_65_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_65_ws1_ws_order_number_bloom_filter))) + TableScan [TS_3] (rows=144002668 width=7) default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_268] - Please refer to the previous Group By Operator [GBY_266] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_271] + Please refer to the previous Group By Operator [GBY_269] diff --git a/ql/src/test/results/clientpositive/perf/tez/query98.q.out b/ql/src/test/results/clientpositive/perf/tez/query98.q.out index 8e71f1c215..8b1135582c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query98.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query98.q.out @@ -71,83 +71,83 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 8 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Map 6 <- Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized - File Output Operator [FS_79] - Select Operator [SEL_78] (rows=138600 width=701) + Reducer 5 vectorized + File Output Operator [FS_80] + Select Operator [SEL_79] (rows=138600 width=701) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_77] - Select Operator [SEL_76] (rows=138600 width=801) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_78] + Select Operator [SEL_77] (rows=138600 width=801) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - PTF Operator [PTF_75] (rows=138600 width=689) + PTF Operator [PTF_76] (rows=138600 width=689) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col1"}] - Select Operator [SEL_74] (rows=138600 width=689) + Select Operator [SEL_75] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_73] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_74] PartitionCols:_col1 - Group By Operator [GBY_72] (rows=138600 width=689) + Group By Operator [GBY_73] (rows=138600 width=689) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_16] (rows=138600 width=689) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)"],keys:_col9, _col8, _col5, _col6, _col7 - Merge Join Operator [MERGEJOIN_57] (rows=18334631 width=577) - Conds:RS_12._col1=RS_71._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_71] + Group By Operator [GBY_17] (rows=138600 width=689) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col8)"],keys:_col5, _col4, _col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_58] (rows=18334631 width=577) + Conds:RS_61._col0=RS_14._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_61] PartitionCols:_col0 - Select Operator [SEL_70] (rows=138600 width=581) + Select Operator [SEL_60] (rows=138600 width=581) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_69] (rows=138600 width=581) + Filter Operator [FIL_59] (rows=138600 width=581) predicate:((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=581) + TableScan [TS_0] (rows=462000 width=581) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_class","i_category"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_56] (rows=61115434 width=70) - Conds:RS_68._col0=RS_60._col0(Inner),Output:["_col1","_col2"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_60] + Merge Join Operator [MERGEJOIN_57] (rows=61115434 width=70) + Conds:RS_72._col0=RS_64._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_64] PartitionCols:_col0 - Select Operator [SEL_59] (rows=8116 width=4) + Select Operator [SEL_63] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_58] (rows=8116 width=98) + Filter Operator [FIL_62] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00' AND TIMESTAMP'2001-02-11 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=98) + TableScan [TS_6] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_68] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_72] PartitionCols:_col0 - Select Operator [SEL_67] (rows=550076554 width=114) + Select Operator [SEL_71] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_66] (rows=550076554 width=114) + Filter Operator [FIL_70] (rows=550076554 width=114) predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=114) + TableScan [TS_3] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ext_sales_price"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_65] - Group By Operator [GBY_64] (rows=1 width=12) + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_69] + Group By Operator [GBY_68] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_63] - Group By Operator [GBY_62] (rows=1 width=12) + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_67] + Group By Operator [GBY_66] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_61] (rows=8116 width=4) + Select Operator [SEL_65] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_59] + Please refer to the previous Select Operator [SEL_63] diff --git a/ql/src/test/results/clientpositive/ppd_join2.q.out b/ql/src/test/results/clientpositive/ppd_join2.q.out index c09fefe225..860cf6cce4 100644 --- a/ql/src/test/results/clientpositive/ppd_join2.q.out +++ b/ql/src/test/results/clientpositive/ppd_join2.q.out @@ -27,12 +27,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -83,24 +83,20 @@ STAGE PLANS: Filter Operator predicate: ((_col0 <> '10') or (_col2 <> '10')) (type: boolean) Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col3 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string) TableScan alias: src filterExpr: ((key <> '306') and (sqrt(key) <> 13.0D) and value is not null) (type: boolean) @@ -118,17 +114,25 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + 0 _col0 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col2, _col3 Statistics: Num rows: 426 Data size: 75828 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: string), _col1 (type: string) + expressions: _col3 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 426 Data size: 75828 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1718,12 +1722,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -1774,24 +1778,20 @@ STAGE PLANS: Filter Operator predicate: ((_col0 <> '10') or (_col2 <> '10')) (type: boolean) Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col3 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string) TableScan alias: src filterExpr: ((key <> '306') and (sqrt(key) <> 13.0D) and value is not null) (type: boolean) @@ -1809,17 +1809,25 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 262 Data size: 93272 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + 0 _col0 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col2, _col3 Statistics: Num rows: 426 Data size: 75828 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: string), _col1 (type: string) + expressions: _col3 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 426 Data size: 75828 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/ppd_join3.q.out b/ql/src/test/results/clientpositive/ppd_join3.q.out index 71bf59fbc1..bb5f84ca70 100644 --- a/ql/src/test/results/clientpositive/ppd_join3.q.out +++ b/ql/src/test/results/clientpositive/ppd_join3.q.out @@ -27,12 +27,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -85,16 +85,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 86 Data size: 7482 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) @@ -113,20 +106,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 86 Data size: 7482 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 135 Data size: 35775 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((_col2 > '10') or (_col1 <> '10')) (type: boolean) + predicate: ((_col0 > '10') or (_col3 <> '10')) (type: boolean) Statistics: Num rows: 135 Data size: 35775 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: string), _col3 (type: string) + expressions: _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 135 Data size: 24030 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1773,12 +1773,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -1831,16 +1831,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 86 Data size: 7482 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) @@ -1859,20 +1852,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 86 Data size: 7482 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 135 Data size: 35775 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((_col2 > '10') or (_col1 <> '10')) (type: boolean) + predicate: ((_col0 > '10') or (_col3 <> '10')) (type: boolean) Statistics: Num rows: 135 Data size: 35775 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: string), _col3 (type: string) + expressions: _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 135 Data size: 24030 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/ppd_outer_join4.q.out b/ql/src/test/results/clientpositive/ppd_outer_join4.q.out index e6beceab6d..55039aa6a0 100644 --- a/ql/src/test/results/clientpositive/ppd_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/ppd_outer_join4.q.out @@ -27,12 +27,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -86,17 +86,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 86 Data size: 22790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string) TableScan alias: b filterExpr: ((key > '15') and (key < '20') and (sqrt(key) <> 13.0D)) (type: boolean) @@ -115,6 +107,14 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 86 Data size: 22790 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -125,7 +125,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 135 Data size: 59805 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col2 (type: string) + expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 135 Data size: 59805 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -427,12 +427,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -486,17 +486,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 86 Data size: 22790 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string) TableScan alias: b filterExpr: ((key > '15') and (key < '20') and (sqrt(key) <> 13.0D)) (type: boolean) @@ -515,6 +507,14 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 86 Data size: 22790 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -525,7 +525,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 135 Data size: 59805 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col2 (type: string) + expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 135 Data size: 59805 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out b/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out index 9547e4fa7c..96c67f3bf9 100644 --- a/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out +++ b/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out @@ -35,32 +35,32 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_n94 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-18 is a root stage - Stage-13 depends on stages: Stage-18 - Stage-12 depends on stages: Stage-13 , consists of Stage-16, Stage-17, Stage-1 - Stage-16 has a backup stage: Stage-1 - Stage-10 depends on stages: Stage-16 - Stage-9 depends on stages: Stage-1, Stage-10, Stage-11 , consists of Stage-14, Stage-15, Stage-2 - Stage-14 has a backup stage: Stage-2 + Stage-17 is a root stage + Stage-12 depends on stages: Stage-17 + Stage-11 depends on stages: Stage-12 , consists of Stage-15, Stage-16, Stage-3 + Stage-15 has a backup stage: Stage-3 + Stage-9 depends on stages: Stage-15 + Stage-8 depends on stages: Stage-3, Stage-9, Stage-10 , consists of Stage-13, Stage-14, Stage-1 + Stage-13 has a backup stage: Stage-1 + Stage-6 depends on stages: Stage-13 + Stage-2 depends on stages: Stage-1, Stage-6, Stage-7 + Stage-14 has a backup stage: Stage-1 Stage-7 depends on stages: Stage-14 - Stage-3 depends on stages: Stage-2, Stage-7, Stage-8 - Stage-15 has a backup stage: Stage-2 - Stage-8 depends on stages: Stage-15 - Stage-2 - Stage-17 has a backup stage: Stage-1 - Stage-11 depends on stages: Stage-17 Stage-1 - Stage-0 depends on stages: Stage-3 + Stage-16 has a backup stage: Stage-3 + Stage-10 depends on stages: Stage-16 + Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-18 + Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_2:$hdt$_3:t1_n94 + $hdt$_1:$hdt$_2:$hdt$_3:t1_n94 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_2:$hdt$_3:t1_n94 + $hdt$_1:$hdt$_2:$hdt$_3:t1_n94 TableScan alias: t1_n94 filterExpr: key is not null (type: boolean) @@ -77,7 +77,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-13 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -109,24 +109,24 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-12 + Stage: Stage-11 Conditional Operator - Stage: Stage-16 + Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $hdt$_1:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME + $hdt$_1:$INTNAME TableScan HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-10 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -158,17 +158,26 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-9 + Stage: Stage-8 Conditional Operator - Stage: Stage-14 + Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:src2 + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:src2 + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: TableScan alias: src2 filterExpr: key is not null (type: boolean) @@ -180,39 +189,30 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 665 Data size: 57898 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 665 Data size: 57898 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -236,23 +236,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-15 + Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $hdt$_0:src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-8 - Map Reduce - Map Operator Tree: + $hdt$_0:src2 TableScan alias: src2 filterExpr: key is not null (type: boolean) @@ -264,39 +255,41 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 + HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 665 Data size: 57898 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 665 Data size: 57898 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE TableScan alias: src2 filterExpr: key is not null (type: boolean) @@ -314,6 +307,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -335,14 +335,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-17 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:src1 + $hdt$_1:$hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:src1 + $hdt$_1:$hdt$_1:src1 TableScan alias: src1 filterExpr: key is not null (type: boolean) @@ -359,7 +359,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-11 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan @@ -381,7 +381,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out index f023cef254..52b2f98e67 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out @@ -803,7 +803,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n8 POSTHOOK: Input: default@test2_n5 #### A masked pattern was here #### -Warning: Shuffle Join JOIN[12][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test2_n5 @@ -832,6 +832,18 @@ STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) TableScan alias: test2_n5 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE @@ -850,18 +862,6 @@ STAGE PLANS: null sort order: sort order: value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) - TableScan - alias: a - Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: int), col_1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -870,9 +870,9 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - residual filter predicates: {((_col6 + _col0) >= 100)} + residual filter predicates: {((_col0 + _col3) >= 100)} Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + expressions: _col3 (type: int), _col4 (type: int), _col5 (type: string), _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Limit Number of rows: 10 @@ -889,7 +889,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[12][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM test2_n5 JOIN test1_n8 a ON (a.key+test2_n5.key >= 100) @@ -909,15 +909,15 @@ POSTHOOK: Input: default@test1_n8 POSTHOOK: Input: default@test2_n5 #### A masked pattern was here #### 104 3 Fli 100 1 Bob NULL NULL NULL -104 3 Fli 99 0 Alice NULL NULL NULL 102 2 Del 100 1 Bob 99 2 Mat +104 3 Fli 99 0 Alice NULL NULL NULL 102 2 Del 99 0 Alice 99 2 Mat -102 2 Del 100 1 Bob 101 2 Car -102 2 Del 99 0 Alice 101 2 Car -103 2 Ema 100 1 Bob 99 2 Mat -103 2 Ema 99 0 Alice 99 2 Mat -103 2 Ema 100 1 Bob 101 2 Car -103 2 Ema 99 0 Alice 101 2 Car +104 3 Fli 99 2 Mat NULL NULL NULL +102 2 Del 99 2 Mat 99 2 Mat +104 3 Fli 101 2 Car NULL NULL NULL +102 2 Del 101 2 Car 99 2 Mat +104 3 Fli 98 NULL None NULL NULL NULL +102 2 Del 98 NULL None 99 2 Mat Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * diff --git a/ql/src/test/results/clientpositive/spark/auto_join20.q.out b/ql/src/test/results/clientpositive/spark/auto_join20.q.out index a6e53a5164..d18846547a 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join20.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join20.q.out @@ -19,19 +19,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src2 filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -42,21 +43,21 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {_col2} keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 2 + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -66,15 +67,23 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {_col2} + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -82,10 +91,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src3 @@ -96,25 +105,21 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 0 to 2 + Left Outer Join 0 to 1 filter predicates: - 0 + 0 {_col2} 1 - 2 {_col2} keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) + expressions: hash(_col3,_col4,_col5,_col6,_col0,_col1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) minReductionHashAggr: 0.99 @@ -129,7 +134,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -191,19 +196,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src2 filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -214,21 +220,21 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {_col2} keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 2 + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 filterExpr: (UDFToDouble(key) < 10.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -238,15 +244,23 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {_col2} + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -254,10 +268,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src3 @@ -268,25 +282,21 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 0 to 2 + Left Outer Join 0 to 1 filter predicates: - 0 + 0 {_col2} 1 - 2 {_col2} keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5) (type: int) + expressions: hash(_col3,_col4,_col5,_col6,_col0,_col1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) minReductionHashAggr: 0.99 @@ -301,7 +311,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_join21.q.out b/ql/src/test/results/clientpositive/spark/auto_join21.q.out index 4f6e9ddd66..419f420ba0 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join21.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join21.q.out @@ -9,19 +9,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -31,21 +32,21 @@ STAGE PLANS: Number of rows: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {_col2} keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 2 + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -54,15 +55,23 @@ STAGE PLANS: Limit Number of rows: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {_col2} + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col2 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -70,10 +79,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src3 @@ -84,39 +93,39 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 1 to 2 + Left Outer Join 0 to 1 filter predicates: - 0 + 0 {_col2} 1 - 2 {_col2} keys: 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/auto_join28.q.out b/ql/src/test/results/clientpositive/spark/auto_join28.q.out index 20734e6ba2..eb484bc4ac 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join28.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join28.q.out @@ -9,19 +9,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -31,21 +32,21 @@ STAGE PLANS: Number of rows: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {_col2} keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 2 + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -54,15 +55,23 @@ STAGE PLANS: Limit Number of rows: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {_col2} + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col2 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -70,10 +79,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src3 @@ -84,39 +93,39 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 1 to 2 + Left Outer Join 0 to 1 filter predicates: - 0 + 0 {_col2} 1 - 2 {_col2} keys: 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/auto_join29.q.out b/ql/src/test/results/clientpositive/spark/auto_join29.q.out index afa4f03d11..45aba3124d 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join29.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join29.q.out @@ -9,19 +9,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -31,21 +32,21 @@ STAGE PLANS: Number of rows: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {_col2} keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 2 + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -54,15 +55,23 @@ STAGE PLANS: Limit Number of rows: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {_col2} + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 {_col2} + 1 + keys: + 0 _col0 (type: string) + 1 _col2 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -70,10 +79,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src3 @@ -84,39 +93,39 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 1 to 2 + Left Outer Join 0 to 1 filter predicates: - 0 + 0 {_col2} 1 - 2 {_col2} keys: 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2736,19 +2745,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -2757,22 +2767,25 @@ STAGE PLANS: Limit Number of rows: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 - 2 {_col2} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 2 + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -2781,18 +2794,23 @@ STAGE PLANS: Limit Number of rows: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator filter predicates: - 0 + 0 {_col2} 1 - 2 {_col2} keys: 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) + 1 _col2 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -2800,10 +2818,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src3 @@ -2814,39 +2832,39 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 1 to 2 + Left Outer Join 0 to 1 filter predicates: - 0 + 0 {_col2} 1 - 2 {_col2} keys: 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out b/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out index b8c467fc01..ccee342e04 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out @@ -111,35 +111,34 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 9 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: orderpayment - filterExpr: (date is not null and dealid is not null and cityid is not null and userid is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + alias: user + filterExpr: userid is not null (type: boolean) + Statistics: Num rows: 100 Data size: 288 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (date is not null and dealid is not null and cityid is not null and userid is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + predicate: userid is not null (type: boolean) + Statistics: Num rows: 100 Data size: 288 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dealid (type: int), date (type: string), cityid (type: int), userid (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + expressions: userid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 100 Data size: 288 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) auto parallelism: false Execution mode: vectorized Path -> Alias: @@ -147,7 +146,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: orderpayment_small + base file name: user_small input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -155,18 +154,18 @@ STAGE PLANS: bucket_count -1 bucketing_version 2 column.name.delimiter , - columns dealid,date,time,cityid,userid + columns userid columns.comments - columns.types int:string:string:int:int + columns.types int #### A masked pattern was here #### - name default.orderpayment_small + name default.user_small numFiles 1 - numRows 1 - rawDataSize 36 - serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} + numRows 100 + rawDataSize 288 + serialization.ddl struct user_small { i32 userid} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 37 + totalSize 388 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -177,46 +176,47 @@ STAGE PLANS: bucket_count -1 bucketing_version 2 column.name.delimiter , - columns dealid,date,time,cityid,userid + columns userid columns.comments - columns.types int:string:string:int:int + columns.types int #### A masked pattern was here #### - name default.orderpayment_small + name default.user_small numFiles 1 - numRows 1 - rawDataSize 36 - serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} + numRows 100 + rawDataSize 288 + serialization.ddl struct user_small { i32 userid} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 37 + totalSize 388 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orderpayment_small - name: default.orderpayment_small + name: default.user_small + name: default.user_small Truncated Path -> Alias: - /orderpayment_small [$hdt$_1:orderpayment] - Map 6 + /user_small [$hdt$_0:user] + Map 3 Map Operator Tree: TableScan - alias: dim_pay_date - filterExpr: date is not null (type: boolean) + alias: orderpayment + filterExpr: (date is not null and dealid is not null and cityid is not null and userid is not null) (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: date is not null (type: boolean) + predicate: (date is not null and dealid is not null and cityid is not null and userid is not null) (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: date (type: string) - outputColumnNames: _col0 + expressions: dealid (type: int), date (type: string), cityid (type: int), userid (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - tag: 1 + tag: 0 + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) auto parallelism: false Execution mode: vectorized Path -> Alias: @@ -271,27 +271,27 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [$hdt$_2:dim_pay_date] + /orderpayment_small [$hdt$_1:$hdt$_1:orderpayment] Map 7 Map Operator Tree: TableScan - alias: deal - filterExpr: dealid is not null (type: boolean) + alias: dim_pay_date + filterExpr: date is not null (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: dealid is not null (type: boolean) + predicate: date is not null (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: dealid (type: int) + expressions: date (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: false @@ -348,20 +348,20 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [$hdt$_3:deal] + /orderpayment_small [$hdt$_1:$hdt$_2:dim_pay_date] Map 8 Map Operator Tree: TableScan - alias: order_city - filterExpr: cityid is not null (type: boolean) + alias: deal + filterExpr: dealid is not null (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: cityid is not null (type: boolean) + predicate: dealid is not null (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cityid (type: int) + expressions: dealid (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -425,28 +425,28 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [$hdt$_4:order_city] + /orderpayment_small [$hdt$_1:$hdt$_3:deal] Map 9 Map Operator Tree: TableScan - alias: user - filterExpr: userid is not null (type: boolean) - Statistics: Num rows: 100 Data size: 288 Basic stats: COMPLETE Column stats: NONE + alias: order_city + filterExpr: cityid is not null (type: boolean) + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: userid is not null (type: boolean) - Statistics: Num rows: 100 Data size: 288 Basic stats: COMPLETE Column stats: NONE + predicate: cityid is not null (type: boolean) + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: userid (type: int) + expressions: cityid (type: int) outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: false Execution mode: vectorized @@ -455,7 +455,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: user_small + base file name: orderpayment_small input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -463,18 +463,18 @@ STAGE PLANS: bucket_count -1 bucketing_version 2 column.name.delimiter , - columns userid + columns dealid,date,time,cityid,userid columns.comments - columns.types int + columns.types int:string:string:int:int #### A masked pattern was here #### - name default.user_small + name default.orderpayment_small numFiles 1 - numRows 100 - rawDataSize 288 - serialization.ddl struct user_small { i32 userid} + numRows 1 + rawDataSize 36 + serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 388 + totalSize 37 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -485,25 +485,65 @@ STAGE PLANS: bucket_count -1 bucketing_version 2 column.name.delimiter , - columns userid + columns dealid,date,time,cityid,userid columns.comments - columns.types int + columns.types int:string:string:int:int #### A masked pattern was here #### - name default.user_small + name default.orderpayment_small numFiles 1 - numRows 100 - rawDataSize 288 - serialization.ddl struct user_small { i32 userid} + numRows 1 + rawDataSize 36 + serialization.ddl struct orderpayment_small { i32 dealid, string date, string time, i32 cityid, i32 userid} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 388 + totalSize 37 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.user_small - name: default.user_small + name: default.orderpayment_small + name: default.orderpayment_small Truncated Path -> Alias: - /user_small [$hdt$_0:user] + /orderpayment_small [$hdt$_1:$hdt$_4:order_city] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col5, _col6 + Statistics: Num rows: 110 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: string), _col6 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 110 Data size: 316 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 5 Data size: 10 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 Needs Tagging: true Reduce Operator Tree: Join Operator @@ -523,7 +563,7 @@ STAGE PLANS: tag: 0 value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: string) auto parallelism: false - Reducer 3 + Reducer 5 Needs Tagging: true Reduce Operator Tree: Join Operator @@ -543,7 +583,7 @@ STAGE PLANS: tag: 0 value expressions: _col3 (type: int), _col4 (type: string), _col5 (type: int) auto parallelism: false - Reducer 4 + Reducer 6 Needs Tagging: true Reduce Operator Tree: Join Operator @@ -560,49 +600,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col3 (type: int) Statistics: Num rows: 1 Data size: 46 Basic stats: COMPLETE Column stats: NONE - tag: 0 + tag: 1 value expressions: _col4 (type: string), _col5 (type: int) auto parallelism: false - Reducer 5 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col5 - Statistics: Num rows: 110 Data size: 316 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 110 Data size: 316 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 5 Data size: 10 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out index 42e3e9db3e..7269d98777 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out @@ -355,12 +355,31 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: c + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: b @@ -379,7 +398,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 6 Map Operator Tree: TableScan alias: a @@ -399,25 +418,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: c - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -425,28 +425,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + 1 _col2 (type: string) + outputColumnNames: _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: string) + expressions: _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -455,7 +438,7 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 4 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -471,6 +454,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join20.q.out b/ql/src/test/results/clientpositive/spark/join20.q.out index 15d531a1d4..8d5faf93ea 100644 --- a/ql/src/test/results/clientpositive/spark/join20.q.out +++ b/ql/src/test/results/clientpositive/spark/join20.q.out @@ -18,11 +18,29 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: boolean) + Execution mode: vectorized + Map 4 Map Operator Tree: TableScan alias: src1 @@ -43,7 +61,7 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 4 + Map 6 Map Operator Tree: TableScan alias: src2 @@ -64,58 +82,59 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: boolean) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 0 to 2 + Left Outer Join 0 to 1 filter predicates: - 0 + 0 {VALUE._col1} 1 - 2 {VALUE._col1} keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Stage: Stage-0 Fetch Operator @@ -701,11 +720,29 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: boolean) + Execution mode: vectorized + Map 4 Map Operator Tree: TableScan alias: src1 @@ -726,7 +763,7 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 4 + Map 6 Map Operator Tree: TableScan alias: src2 @@ -747,58 +784,59 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: boolean) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 0 to 2 + Left Outer Join 0 to 1 filter predicates: - 0 + 0 {VALUE._col1} 1 - 2 {VALUE._col1} keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join21.q.out b/ql/src/test/results/clientpositive/spark/join21.q.out index 2f517b3020..d11712d39f 100644 --- a/ql/src/test/results/clientpositive/spark/join21.q.out +++ b/ql/src/test/results/clientpositive/spark/join21.q.out @@ -16,11 +16,29 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: boolean) + Execution mode: vectorized + Map 4 Map Operator Tree: TableScan alias: src1 @@ -40,7 +58,7 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 4 + Map 6 Map Operator Tree: TableScan alias: src2 @@ -60,58 +78,59 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), (UDFToDouble(key) < 10.0D) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: boolean) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 1 to 2 + Left Outer Join 0 to 1 filter predicates: - 0 + 0 {VALUE._col1} 1 - 2 {VALUE._col1} keys: 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join26.q.out b/ql/src/test/results/clientpositive/spark/join26.q.out index d60593dc04..2213e640d4 100644 --- a/ql/src/test/results/clientpositive/spark/join26.q.out +++ b/ql/src/test/results/clientpositive/spark/join26.q.out @@ -49,27 +49,26 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + filterExpr: (key is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Position of Big Table: 0 + 1 _col2 (type: string) + Position of Big Table: 1 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -78,33 +77,36 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 - bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -112,40 +114,37 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src1 [$hdt$_2:x] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:z] Map 3 Map Operator Tree: TableScan - alias: z - filterExpr: (key is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) Position of Big Table: 0 Execution mode: vectorized Local Work: @@ -155,36 +154,33 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src1 numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -192,24 +188,26 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:z] + /src1 [$hdt$_1:$hdt$_2:x] Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: y @@ -227,54 +225,62 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col1, _col2, _col4 + outputColumnNames: _col1, _col2 input vertices: - 1 Map 2 - 2 Map 3 + 1 Map 3 Position of Big Table: 0 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col1, _col3, _col4 + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value,val2 + columns.comments + columns.types string:string:string #### A masked pattern was here #### - name default.dest_j1_n10 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest_j1_n10 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 + name default.dest_j1_n10 + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct dest_j1_n10 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1_n10 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1_n10 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Execution mode: vectorized Local Work: Map Reduce Local Work @@ -330,7 +336,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [$hdt$_1:y] + /src [$hdt$_1:$hdt$_1:y] Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/join28.q.out b/ql/src/test/results/clientpositive/spark/join28.q.out index 19335dd7ce..ada72259c2 100644 --- a/ql/src/test/results/clientpositive/spark/join28.q.out +++ b/ql/src/test/results/clientpositive/spark/join28.q.out @@ -43,45 +43,43 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + filterExpr: (key is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) + 1 _col1 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: z - filterExpr: (key is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -90,7 +88,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: y @@ -106,28 +104,35 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col1, _col3 + outputColumnNames: _col1 input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1_n11 + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1_n11 Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/join32.q.out b/ql/src/test/results/clientpositive/spark/join32.q.out index 8f49e73cbe..dd9507c65e 100644 --- a/ql/src/test/results/clientpositive/spark/join32.q.out +++ b/ql/src/test/results/clientpositive/spark/join32.q.out @@ -49,26 +49,26 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: x - filterExpr: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col1 (type: string) - Position of Big Table: 0 + Position of Big Table: 1 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -77,7 +77,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -89,14 +89,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -111,39 +111,39 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [$hdt$_2:x] + /src [$hdt$_0:y] Map 3 Map Operator Tree: TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + filterExpr: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Execution mode: vectorized Local Work: @@ -153,7 +153,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -165,14 +165,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -187,26 +187,26 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [$hdt$_0:y] + /src1 [$hdt$_1:$hdt$_2:x] Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: z @@ -229,22 +229,22 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col4 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2, _col3 input vertices: - 1 Map 3 - Position of Big Table: 0 + 0 Map 1 + Position of Big Table: 1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -336,7 +336,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:z] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out index b11cba3ce3..0791b17efa 100644 --- a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out @@ -57,7 +57,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -132,78 +132,61 @@ STAGE PLANS: name: default.src1 name: default.src1 Truncated Path -> Alias: - /src1 [$hdt$_2:x] + /src1 [$hdt$_1:$hdt$_2:x] Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: z - filterExpr: (value is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) + alias: y + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: string) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Execution mode: vectorized - Local Work: - Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -213,6 +196,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -220,65 +204,84 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:z] - Map 4 + /src [$hdt$_0:y] + Map 3 Map Operator Tree: TableScan - alias: y - filterExpr: key is not null (type: boolean) + alias: z + filterExpr: (value is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 4 + Position of Big Table: 0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col0 (type: string) + auto parallelism: false Execution mode: vectorized + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 - bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -288,7 +291,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -296,20 +298,18 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_0:y] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -317,12 +317,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col4 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -553,7 +553,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -628,51 +628,39 @@ STAGE PLANS: name: default.src1 name: default.src1 Truncated Path -> Alias: - /src1 [$hdt$_2:x] + /src1 [$hdt$_1:$hdt$_2:x] Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: w - filterExpr: value is not null (type: boolean) + alias: y + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Execution mode: vectorized - Local Work: - Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -725,32 +713,44 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [$hdt$_1:w] - Map 4 + /src [$hdt$_0:y] + Map 3 Map Operator Tree: TableScan - alias: y - filterExpr: key is not null (type: boolean) + alias: w + filterExpr: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string) - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 4 + Position of Big Table: 0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Execution mode: vectorized + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -803,7 +803,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [$hdt$_0:y] + /src [$hdt$_1:$hdt$_1:w] Map 5 Map Operator Tree: TableScan @@ -881,22 +881,22 @@ STAGE PLANS: name: default.src1 name: default.src1 Truncated Path -> Alias: - /src1 [$hdt$_3:z] + /src1 [$hdt$_2:z] Reducer 2 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 + Inner Join 1 to 2 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col1 (type: string) 2 _col0 (type: string) - outputColumnNames: _col1, _col4, _col6 + outputColumnNames: _col1, _col3, _col6 Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col6 (type: string), _col4 (type: string) + expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1124,7 +1124,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -1199,74 +1199,64 @@ STAGE PLANS: name: default.src1 name: default.src1 Truncated Path -> Alias: - /src1 [$hdt$_2:x] + /src1 [$hdt$_1:$hdt$_2:x] Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: y - filterExpr: key is not null (type: boolean) + alias: z + filterExpr: (value is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string) - auto parallelism: false - Execution mode: vectorized - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false + Execution mode: vectorized Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 - bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1276,7 +1266,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -1284,70 +1273,78 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [$hdt$_1:y] - Map 4 + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:z] + Map 3 Map Operator Tree: TableScan - alias: z - filterExpr: (value is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) + alias: y + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: false + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + input vertices: + 1 Map 4 + Position of Big Table: 0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Execution mode: vectorized + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1357,6 +1354,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -1364,18 +1362,20 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:z] + /src [$hdt$_1:$hdt$_1:y] Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -1383,12 +1383,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) + expressions: _col2 (type: string), _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1616,33 +1616,32 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: x - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + filterExpr: (value is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col1 (type: string) auto parallelism: false Execution mode: vectorized Path -> Alias: @@ -1650,33 +1649,36 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 - bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -1684,42 +1686,41 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src1 [$hdt$_1:x] - Map 4 + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:z] + Map 3 Map Operator Tree: TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) auto parallelism: false Execution mode: vectorized Path -> Alias: @@ -1727,7 +1728,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1739,14 +1740,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -1761,33 +1762,33 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [$hdt$_2:y] + /src1 [$hdt$_1:$hdt$_1:x] Map 5 Map Operator Tree: TableScan - alias: z - filterExpr: (value is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) + alias: y + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1804,27 +1805,23 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + bucketing_version 2 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1834,6 +1831,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 bucketing_version 2 column.name.delimiter , @@ -1841,51 +1839,33 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:z] + /src [$hdt$_1:$hdt$_2:y] Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: string) - auto parallelism: false - Reducer 3 Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1921,6 +1901,26 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col0 (type: string) + auto parallelism: false Stage: Stage-0 Move Operator @@ -2117,7 +2117,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -2141,10 +2141,30 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: x + filterExpr: (value is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 3 Map Operator Tree: TableScan alias: y @@ -2165,7 +2185,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) @@ -2177,38 +2197,18 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: x - filterExpr: (value is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) + expressions: _col2 (type: string), _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2383,7 +2383,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -2407,10 +2407,30 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: y + filterExpr: (value is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 3 Map Operator Tree: TableScan alias: y @@ -2431,7 +2451,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string) @@ -2443,38 +2463,18 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: y - filterExpr: (value is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col2, _col3 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) + expressions: _col2 (type: string), _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/join33.q.out b/ql/src/test/results/clientpositive/spark/join33.q.out index 1a389e7e8d..b451b77bef 100644 --- a/ql/src/test/results/clientpositive/spark/join33.q.out +++ b/ql/src/test/results/clientpositive/spark/join33.q.out @@ -49,26 +49,26 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: x - filterExpr: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col1 (type: string) - Position of Big Table: 0 + Position of Big Table: 1 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -77,7 +77,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -89,14 +89,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -111,39 +111,39 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [$hdt$_2:x] + /src [$hdt$_0:y] Map 3 Map Operator Tree: TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + filterExpr: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + 0 _col0 (type: string) + 1 _col1 (type: string) Position of Big Table: 0 Execution mode: vectorized Local Work: @@ -153,7 +153,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -165,14 +165,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -187,26 +187,26 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [$hdt$_0:y] + /src1 [$hdt$_1:$hdt$_2:x] Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: z @@ -229,22 +229,22 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col4 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2, _col3 input vertices: - 1 Map 3 - Position of Big Table: 0 + 0 Map 1 + Position of Big Table: 1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col4 (type: string) + expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -336,7 +336,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:z] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/join40.q.out b/ql/src/test/results/clientpositive/spark/join40.q.out index 9642c20671..08d3d31474 100644 --- a/ql/src/test/results/clientpositive/spark/join40.q.out +++ b/ql/src/test/results/clientpositive/spark/join40.q.out @@ -1805,11 +1805,29 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: boolean) + Execution mode: vectorized + Map 4 Map Operator Tree: TableScan alias: src1 @@ -1830,7 +1848,7 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 4 + Map 6 Map Operator Tree: TableScan alias: src2 @@ -1851,58 +1869,59 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: boolean) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 0 to 2 + Left Outer Join 0 to 1 filter predicates: - 0 + 0 {VALUE._col1} 1 - 2 {VALUE._col1} keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Stage: Stage-0 Fetch Operator @@ -2488,11 +2507,29 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: boolean) + Execution mode: vectorized + Map 4 Map Operator Tree: TableScan alias: src1 @@ -2513,7 +2550,7 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 4 + Map 6 Map Operator Tree: TableScan alias: src2 @@ -2534,58 +2571,59 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), (UDFToDouble(key) < 20.0D) (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: boolean) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 - Right Outer Join 0 to 2 + Left Outer Join 0 to 1 filter predicates: - 0 + 0 {VALUE._col1} 1 - 2 {VALUE._col1} keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - null sort order: zzzzzz - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + null sort order: zzzzzz + sort order: ++++++ + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out index be627199a3..76d4b638ee 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out @@ -218,7 +218,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part p1 join part p2 join part p3 on p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name PREHOOK: type: QUERY @@ -237,11 +237,26 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized + Map 3 Map Operator Tree: TableScan alias: p2 @@ -262,7 +277,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: p3 @@ -283,22 +298,27 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 728 Data size: 178830 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col9 + _col0) = _col0) (type: boolean) + Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -313,30 +333,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 728 Data size: 178830 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 + _col18) = _col18) (type: boolean) - Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -344,7 +340,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part p1 join part p2 join part p3 on p2.p_partkey = 1 and p3.p_name = p2.p_name PREHOOK: type: QUERY @@ -363,11 +359,26 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized + Map 3 Map Operator Tree: TableScan alias: p3 @@ -388,7 +399,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: p2 @@ -409,37 +420,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: double), _col7 (type: string) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) - Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -450,7 +431,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 728 Data size: 178830 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col17 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: int), _col23 (type: string), _col24 (type: double), _col25 (type: string), 1 (type: int), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: int), _col23 (type: string), _col24 (type: double), _col25 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 728 Data size: 178830 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -460,6 +441,21 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out index a5954c0c59..2dc3b5228b 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out @@ -222,7 +222,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part p1 join part p2 join part p3 where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name @@ -243,11 +243,26 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized + Map 3 Map Operator Tree: TableScan alias: p2 @@ -268,7 +283,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: p3 @@ -289,22 +304,27 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 728 Data size: 178830 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col9 + _col0) = _col0) (type: boolean) + Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -319,30 +339,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 728 Data size: 178830 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 + _col18) = _col18) (type: boolean) - Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -350,7 +346,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part p1 join part p2 join part p3 where p2.p_partkey = 1 and p3.p_name = p2.p_name @@ -371,11 +367,26 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized + Map 3 Map Operator Tree: TableScan alias: p3 @@ -396,7 +407,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: p2 @@ -417,37 +428,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: double), _col7 (type: string) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) - Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -458,7 +439,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 728 Data size: 178830 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col17 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: int), _col23 (type: string), _col24 (type: double), _col25 (type: string), 1 (type: int), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: int), _col23 (type: string), _col24 (type: double), _col25 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 728 Data size: 178830 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -468,6 +449,21 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out index 3fafd67158..0934b5b164 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out @@ -76,41 +76,42 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: p2 - filterExpr: p2_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + alias: p1 + filterExpr: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p2_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Map 3 Map Operator Tree: TableScan - alias: p3 - filterExpr: p3_name is not null (type: boolean) + alias: p2 + filterExpr: p2_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: p2_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator @@ -121,25 +122,25 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan - alias: p1 - filterExpr: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: p3 + filterExpr: p3_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Reducer 2 @@ -147,24 +148,35 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col1 (type: string) 1 _col1 (type: string) - 2 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 57 Data size: 6923 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 57 Data size: 6923 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 57 Data size: 6923 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Stage: Stage-0 Fetch Operator @@ -194,41 +206,42 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: p2 - filterExpr: p2_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + alias: p1 + filterExpr: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p2_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Map 3 Map Operator Tree: TableScan - alias: p3 - filterExpr: p3_name is not null (type: boolean) + alias: p2 + filterExpr: p2_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: p2_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator @@ -239,25 +252,25 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan - alias: p1 - filterExpr: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: p3 + filterExpr: p3_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Reducer 2 @@ -265,24 +278,35 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col1 (type: string) 1 _col1 (type: string) - 2 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 57 Data size: 6923 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 57 Data size: 6923 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 57 Data size: 6923 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Stage: Stage-0 Fetch Operator @@ -290,7 +314,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part p1 join part2_n0 p2 join part3_n0 p3 on p2_partkey + p_partkey = p1.p_partkey and p3_name = p2_name PREHOOK: type: QUERY @@ -313,11 +337,26 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized + Map 3 Map Operator Tree: TableScan alias: p2 @@ -338,7 +377,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: p3 @@ -359,22 +398,27 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 26 Data size: 3173 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((_col9 + _col0) = _col0) (type: boolean) + Statistics: Num rows: 13 Data size: 1586 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1586 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -389,30 +433,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 26 Data size: 3173 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((_col0 + _col18) = _col18) (type: boolean) - Statistics: Num rows: 13 Data size: 1586 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 13 Data size: 1586 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 1586 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -420,7 +440,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part p1 join part2_n0 p2 join part3_n0 p3 on p2_partkey = 1 and p3_name = p2_name PREHOOK: type: QUERY @@ -443,11 +463,26 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized + Map 3 Map Operator Tree: TableScan alias: p3 @@ -468,7 +503,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: p2 @@ -489,37 +524,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: double), _col7 (type: string) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) - Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -530,7 +535,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 26 Data size: 3173 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col17 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: int), _col23 (type: string), _col24 (type: double), _col25 (type: string), 1 (type: int), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: int), _col23 (type: string), _col24 (type: double), _col25 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 26 Data size: 3173 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -540,6 +545,21 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out index 530d029a70..f8f3d6adc0 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out @@ -76,41 +76,42 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: p2 - filterExpr: p2_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + alias: p1 + filterExpr: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p2_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Map 3 Map Operator Tree: TableScan - alias: p3 - filterExpr: p3_name is not null (type: boolean) + alias: p2 + filterExpr: p2_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: p2_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator @@ -121,28 +122,28 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan - alias: p1 - filterExpr: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: p3 + filterExpr: p3_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan alias: p4 @@ -169,25 +170,36 @@ STAGE PLANS: condition map: Inner Join 0 to 1 Inner Join 0 to 2 - Inner Join 2 to 3 keys: 0 _col1 (type: string) 1 _col1 (type: string) 2 _col1 (type: string) - 3 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 85 Data size: 10385 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 85 Data size: 10385 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 85 Data size: 10385 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 57 Data size: 6923 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 57 Data size: 6923 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Stage: Stage-0 Fetch Operator @@ -219,12 +231,33 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: p1 + filterExpr: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized + Map 4 Map Operator Tree: TableScan alias: p2 @@ -245,7 +278,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan alias: p3 @@ -266,27 +299,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 6 - Map Operator Tree: - TableScan - alias: p1 - filterExpr: (p_partkey is not null and p_name is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized Map 7 Map Operator Tree: TableScan @@ -309,23 +321,6 @@ STAGE PLANS: value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -336,33 +331,46 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col18 (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col18 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) - Reducer 4 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col18 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + File Output Operator + compressed: false Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out index 40af83dda7..8a92410f0e 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out @@ -78,41 +78,42 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: p2 - filterExpr: p2_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + alias: p1 + filterExpr: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p2_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Map 3 Map Operator Tree: TableScan - alias: p3 - filterExpr: p3_name is not null (type: boolean) + alias: p2 + filterExpr: p2_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: p2_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator @@ -123,25 +124,25 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan - alias: p1 - filterExpr: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: p3 + filterExpr: p3_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Reducer 2 @@ -149,24 +150,35 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col1 (type: string) 1 _col1 (type: string) - 2 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 57 Data size: 6923 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 57 Data size: 6923 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 57 Data size: 6923 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Stage: Stage-0 Fetch Operator @@ -198,41 +210,42 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: p2 - filterExpr: p2_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + alias: p1 + filterExpr: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p2_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Map 3 Map Operator Tree: TableScan - alias: p3 - filterExpr: p3_name is not null (type: boolean) + alias: p2 + filterExpr: p2_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: p2_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator @@ -243,25 +256,25 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan - alias: p1 - filterExpr: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: p3 + filterExpr: p3_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Reducer 2 @@ -269,24 +282,35 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col1 (type: string) 1 _col1 (type: string) - 2 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 57 Data size: 6923 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 57 Data size: 6923 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 57 Data size: 6923 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Stage: Stage-0 Fetch Operator @@ -294,7 +318,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part p1 join part2_n5 p2 join part3_n2 p3 where p2_partkey + p1.p_partkey = p1.p_partkey and p3_name = p2_name @@ -319,11 +343,26 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized + Map 3 Map Operator Tree: TableScan alias: p2 @@ -344,7 +383,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: p3 @@ -365,22 +404,27 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 26 Data size: 3173 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((_col9 + _col0) = _col0) (type: boolean) + Statistics: Num rows: 13 Data size: 1586 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 1586 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -395,30 +439,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 26 Data size: 3173 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((_col0 + _col18) = _col18) (type: boolean) - Statistics: Num rows: 13 Data size: 1586 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 13 Data size: 1586 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 1586 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -426,7 +446,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part p1 join part2_n5 p2 join part3_n2 p3 where p2_partkey = 1 and p3_name = p2_name @@ -451,11 +471,26 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized + Map 3 Map Operator Tree: TableScan alias: p3 @@ -476,7 +511,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: p2 @@ -497,37 +532,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: double), _col7 (type: string) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) - Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -538,7 +543,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 26 Data size: 3173 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col17 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: int), _col23 (type: string), _col24 (type: double), _col25 (type: string), 1 (type: int), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col18 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: int), _col23 (type: string), _col24 (type: double), _col25 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 26 Data size: 3173 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -548,6 +553,21 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out index fcc10f2cf5..3e9dfab4d4 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out @@ -78,41 +78,42 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: p2 - filterExpr: p2_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + alias: p1 + filterExpr: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p2_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Map 3 Map Operator Tree: TableScan - alias: p3 - filterExpr: p3_name is not null (type: boolean) + alias: p2 + filterExpr: p2_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: p3_name is not null (type: boolean) + predicate: p2_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator @@ -123,28 +124,28 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan - alias: p1 - filterExpr: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + alias: p3 + filterExpr: p3_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + predicate: p3_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + expressions: p3_partkey (type: int), p3_name (type: string), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan alias: p4 @@ -171,25 +172,36 @@ STAGE PLANS: condition map: Inner Join 0 to 1 Inner Join 0 to 2 - Inner Join 2 to 3 keys: 0 _col1 (type: string) 1 _col1 (type: string) 2 _col1 (type: string) - 3 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 85 Data size: 10385 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 85 Data size: 10385 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 85 Data size: 10385 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 57 Data size: 6923 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 57 Data size: 6923 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Stage: Stage-0 Fetch Operator @@ -223,12 +235,33 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: p1 + filterExpr: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_partkey is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized + Map 4 Map Operator Tree: TableScan alias: p2 @@ -249,7 +282,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan alias: p3 @@ -270,27 +303,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized - Map 6 - Map Operator Tree: - TableScan - alias: p1 - filterExpr: (p_partkey is not null and p_name is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_partkey is not null and p_name is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized Map 7 Map Operator Tree: TableScan @@ -313,23 +325,6 @@ STAGE PLANS: value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -340,33 +335,46 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col18 (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col18 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 28 Data size: 3461 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) - Reducer 4 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string) + Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col18 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string), _col27 (type: int), _col28 (type: string), _col29 (type: string), _col30 (type: string), _col31 (type: string), _col32 (type: int), _col33 (type: string), _col34 (type: double), _col35 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + File Output Operator + compressed: false Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out index b73d18672f..efd61c9e54 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out @@ -38,26 +38,26 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 + 1 _col1 (type: string) + Position of Big Table: 1 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -66,7 +66,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -78,14 +78,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -100,38 +100,38 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [$hdt$_2:src1] + /src [$hdt$_0:src] Map 3 Map Operator Tree: TableScan - alias: src - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) Position of Big Table: 0 Execution mode: vectorized @@ -142,7 +142,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -154,14 +154,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -176,26 +176,26 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [$hdt$_0:src] + /src1 [$hdt$_1:$hdt$_2:src1] Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: srcpart @@ -218,42 +218,46 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1 input vertices: - 1 Map 3 - Position of Big Table: 0 + 0 Map 1 + Position of Big Table: 1 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Execution mode: vectorized Local Work: Map Reduce Local Work @@ -457,10 +461,10 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:srcpart] - /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:srcpart] - /srcpart/ds=2008-04-09/hr=11 [$hdt$_1:srcpart] - /srcpart/ds=2008-04-09/hr=12 [$hdt$_1:srcpart] + /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:srcpart] + /srcpart/ds=2008-04-08/hr=12 [$hdt$_1:$hdt$_1:srcpart] + /srcpart/ds=2008-04-09/hr=11 [$hdt$_1:$hdt$_1:srcpart] + /srcpart/ds=2008-04-09/hr=12 [$hdt$_1:$hdt$_1:srcpart] Stage: Stage-0 Fetch Operator @@ -500,42 +504,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src + filterExpr: (value > 'val_450') (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: (value > 'val_450') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: src - filterExpr: (value > 'val_450') (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value > 'val_450') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) Execution mode: vectorized Local Work: @@ -545,7 +549,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: srcpart @@ -566,25 +570,29 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 732 Data size: 7782 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1 input vertices: - 1 Map 3 + 0 Map 1 Statistics: Num rows: 805 Data size: 8560 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 805 Data size: 8560 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 805 Data size: 8560 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out index 3f26b52602..5de92e4d94 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out @@ -32,45 +32,43 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + filterExpr: (key is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) + 1 _col1 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: z - filterExpr: (key is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -79,7 +77,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: y @@ -95,27 +93,34 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col1, _col3 + outputColumnNames: _col1 input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -289,45 +294,43 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: x - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: z + filterExpr: (key is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) + 1 _col1 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan - alias: z - filterExpr: (key is not null and (ds = '2008-04-08') and (11.0D = 11.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -336,7 +339,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: y @@ -352,27 +355,34 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col1, _col3 + outputColumnNames: _col1 input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/ppd_join2.q.out b/ql/src/test/results/clientpositive/spark/ppd_join2.q.out index 759fd869bd..a098239f85 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join2.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join2.q.out @@ -34,39 +34,38 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: src - filterExpr: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) + filterExpr: (value is not null and (key <> '306') and (sqrt(key) <> 13.0D)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: (value is not null and (key <> '306') and (sqrt(key) <> 13.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 4 + Map 3 Map Operator Tree: TableScan alias: src - filterExpr: ((key < '400') and value is not null and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1'))) (type: boolean) + filterExpr: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < '400') and value is not null and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1'))) (type: boolean) + predicate: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -84,21 +83,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (value is not null and (key <> '306') and (sqrt(key) <> 13.0D)) (type: boolean) + filterExpr: ((key < '400') and value is not null and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1'))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (key <> '306') and (sqrt(key) <> 13.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < '400') and value is not null and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1'))) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -107,31 +107,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 <> '10') or (_col2 <> '10')) (type: boolean) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + 1 _col3 (type: string) + outputColumnNames: _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string) + expressions: _col3 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -141,6 +121,30 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 <> '10') or (_col2 <> '10')) (type: boolean) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) Stage: Stage-0 Fetch Operator @@ -1728,39 +1732,38 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: src - filterExpr: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) + filterExpr: (value is not null and (key <> '306') and (sqrt(key) <> 13.0D)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: (value is not null and (key <> '306') and (sqrt(key) <> 13.0D)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 4 + Map 3 Map Operator Tree: TableScan alias: src - filterExpr: ((key < '400') and value is not null and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1'))) (type: boolean) + filterExpr: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < '400') and value is not null and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1'))) (type: boolean) + predicate: ((key < '400') and (key <> '14') and (key <> '305') and (key <> '302') and (key <> '311')) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -1778,21 +1781,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (value is not null and (key <> '306') and (sqrt(key) <> 13.0D)) (type: boolean) + filterExpr: ((key < '400') and value is not null and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1'))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (value is not null and (key <> '306') and (sqrt(key) <> 13.0D)) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < '400') and value is not null and (key <> '14') and (key <> '302') and (key <> '305') and (key <> '311') and ((value <> 'val_50') or (key > '1'))) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -1801,31 +1805,11 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 <> '10') or (_col2 <> '10')) (type: boolean) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + 1 _col3 (type: string) + outputColumnNames: _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string) + expressions: _col3 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1835,6 +1819,30 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 <> '10') or (_col2 <> '10')) (type: boolean) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_join3.q.out b/ql/src/test/results/clientpositive/spark/ppd_join3.q.out index 2ccd76a563..32895ea843 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join3.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join3.q.out @@ -34,21 +34,22 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) + filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) + predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -56,15 +57,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 3 Map Operator Tree: TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) + filterExpr: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) + predicate: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -77,18 +79,18 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) + filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) + predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -96,34 +98,47 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col2 > '10') or (_col1 <> '10')) (type: boolean) - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 > '10') or (_col3 <> '10')) (type: boolean) + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col3 (type: string) + expressions: _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1768,21 +1783,22 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) + filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) + predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -1790,15 +1806,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 3 Map Operator Tree: TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) + filterExpr: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) + predicate: ((key > '0') and (key < '400') and (key <> '1') and (key <> '4') and (key <> '13') and (key <> '11') and (key <> '12')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -1811,18 +1828,18 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: src - filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) + filterExpr: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '12') and (key <> '11') and (key <> '13')) (type: boolean) + predicate: ((key > '0') and (key < '400') and (key <> '4') and (key <> '1') and (key <> '11') and (key <> '12') and (key <> '13') and ((value <> 'val_500') or (key > '1'))) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -1830,34 +1847,47 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col2 > '10') or (_col1 <> '10')) (type: boolean) - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col0 > '10') or (_col3 <> '10')) (type: boolean) + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col3 (type: string) + expressions: _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out b/ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out index 827838e4f4..0048b30d91 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out @@ -34,17 +34,18 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - filterExpr: ((key < '20') and (key > '15') and (sqrt(key) <> 13.0D)) (type: boolean) + alias: b + filterExpr: ((key > '15') and (key < '20') and (sqrt(key) <> 13.0D)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < '20') and (key > '15') and (sqrt(key) <> 13.0D)) (type: boolean) + predicate: ((key > '15') and (key < '20') and (sqrt(key) <> 13.0D)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -61,15 +62,15 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: c + alias: a filterExpr: ((key < '20') and (key > '15') and (sqrt(key) <> 13.0D)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key < '20') and (key > '15') and (sqrt(key) <> 13.0D)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -77,19 +78,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan - alias: b - filterExpr: ((key > '15') and (key < '20') and (sqrt(key) <> 13.0D)) (type: boolean) + alias: c + filterExpr: ((key < '20') and (key > '15') and (sqrt(key) <> 13.0D)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '15') and (key < '20') and (sqrt(key) <> 13.0D)) (type: boolean) + predicate: ((key < '20') and (key > '15') and (sqrt(key) <> 13.0D)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -97,31 +99,45 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col2 (type: string) + expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) Stage: Stage-0 Fetch Operator @@ -421,17 +437,18 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - filterExpr: ((key < '20') and (key > '15') and (sqrt(key) <> 13.0D)) (type: boolean) + alias: b + filterExpr: ((key > '15') and (key < '20') and (sqrt(key) <> 13.0D)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < '20') and (key > '15') and (sqrt(key) <> 13.0D)) (type: boolean) + predicate: ((key > '15') and (key < '20') and (sqrt(key) <> 13.0D)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -448,15 +465,15 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: c + alias: a filterExpr: ((key < '20') and (key > '15') and (sqrt(key) <> 13.0D)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key < '20') and (key > '15') and (sqrt(key) <> 13.0D)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -464,19 +481,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan - alias: b - filterExpr: ((key > '15') and (key < '20') and (sqrt(key) <> 13.0D)) (type: boolean) + alias: c + filterExpr: ((key < '20') and (key > '15') and (sqrt(key) <> 13.0D)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '15') and (key < '20') and (sqrt(key) <> 13.0D)) (type: boolean) + predicate: ((key < '20') and (key > '15') and (sqrt(key) <> 13.0D)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -484,31 +502,45 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col2 (type: string) + expressions: _col2 (type: string), _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 706 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/runtime_skewjoin_mapjoin_spark.q.out b/ql/src/test/results/clientpositive/spark/runtime_skewjoin_mapjoin_spark.q.out index 10b725a4b9..4caf464069 100644 --- a/ql/src/test/results/clientpositive/spark/runtime_skewjoin_mapjoin_spark.q.out +++ b/ql/src/test/results/clientpositive/spark/runtime_skewjoin_mapjoin_spark.q.out @@ -35,22 +35,24 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_n94 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-1 depends on stages: Stage-8 - Stage-5 depends on stages: Stage-1 , consists of Stage-6, Stage-7, Stage-2 - Stage-6 - Stage-3 depends on stages: Stage-6 - Stage-2 depends on stages: Stage-3, Stage-4 - Stage-7 - Stage-4 depends on stages: Stage-7 + Stage-10 is a root stage + Stage-1 depends on stages: Stage-10 + Stage-7 depends on stages: Stage-1 , consists of Stage-9, Stage-5 + Stage-9 + Stage-6 depends on stages: Stage-9 + Stage-5 depends on stages: Stage-6 + Stage-4 depends on stages: Stage-5 , consists of Stage-8, Stage-2 + Stage-8 + Stage-3 depends on stages: Stage-8 + Stage-2 depends on stages: Stage-3 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-10 Spark #### A masked pattern was here #### Vertices: - Map 5 + Map 7 Map Operator Tree: TableScan alias: t1_n94 @@ -74,10 +76,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src1 @@ -97,7 +99,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -118,7 +120,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 5 + 1 Map 7 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -129,7 +131,71 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 6 + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + handleSkewJoin: true + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-9 + Spark +#### A masked pattern was here #### + Vertices: + Map 13 + Map Operator Tree: + TableScan + Spark HashTable Sink Operator + keys: + 0 reducesinkkey0 (type: string) + 1 reducesinkkey0 (type: string) + Local Work: + Map Reduce Local Work + + Stage: Stage-6 + Spark +#### A masked pattern was here #### + Vertices: + Map 12 + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 reducesinkkey0 (type: string) + 1 reducesinkkey0 (type: string) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-5 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 11 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: src2 @@ -149,18 +215,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map 11 + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 handleSkewJoin: true keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() minReductionHashAggr: 0.99 @@ -174,10 +248,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-4 Conditional Operator - Stage: Stage-6 + Stage: Stage-8 Spark #### A masked pattern was here #### Vertices: @@ -188,17 +262,6 @@ STAGE PLANS: keys: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) - Local Work: - Map Reduce Local Work - Map 9 - Map Operator Tree: - TableScan - Spark HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) Local Work: Map Reduce Local Work @@ -206,17 +269,15 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 9 Map Operator Tree: TableScan Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) Group By Operator aggregations: count() minReductionHashAggr: 0.99 @@ -236,10 +297,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Map 7 (GROUP, 1) + Reducer 3 <- Map 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 7 + Map 8 Map Operator Tree: TableScan Reduce Output Operator @@ -264,62 +325,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-7 - Spark -#### A masked pattern was here #### - Vertices: - Map 11 - Map Operator Tree: - TableScan - Spark HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) - Local Work: - Map Reduce Local Work - Map 13 - Map Operator Tree: - TableScan - Spark HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 12 - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - 2 reducesinkkey0 (type: string) - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out index 23267b8987..045c087c2f 100644 --- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out @@ -1637,19 +1637,20 @@ POSTHOOK: Input: default@regular_table1 POSTHOOK: Input: default@regular_table2 POSTHOOK: Output: hdfs://### HDFS PATH ### STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: regular_table1 + alias: regular_table2 filterExpr: col1 is not null (type: boolean) Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1663,7 +1664,6 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 @@ -1675,14 +1675,19 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator - Target Columns: [Map 3 -> [part_col:int (part_col)]] + Target Columns: [Map 1 -> [part_col:int (part_col)]] Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Map 2 + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan - alias: regular_table2 + alias: regular_table1 filterExpr: col1 is not null (type: boolean) Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1692,34 +1697,43 @@ STAGE PLANS: expressions: col1 (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - Select Operator - expressions: _col0 (type: int) outputColumnNames: _col0 + input vertices: + 1 Map 4 Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Select Operator + expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - Target Columns: [Map 3 -> [part_col:int (part_col)]] + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target Columns: [Map 1 -> [part_col:int (part_col)]] + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: partitioned_table1 @@ -1731,15 +1745,12 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 39 Data size: 39 Basic stats: COMPLETE Column stats: NONE + 1 Map 3 + Statistics: Num rows: 19 Data size: 19 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() minReductionHashAggr: 0.99 @@ -1753,7 +1764,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_6.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_6.q.out index 19120df839..83f56fbea0 100644 --- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_6.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_6.q.out @@ -171,7 +171,29 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 5 + Map 6 + Map Operator Tree: + TableScan + alias: part_table_2 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target Columns: [Map 3 -> [part_col:int (part_col)]] + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map 7 Map Operator Tree: TableScan alias: regular_table @@ -195,9 +217,17 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator - Target Columns: [Map 1 -> [part_col:int (part_col)], Map 4 -> [part_col:int (part_col)]] + Target Columns: [Map 1 -> [part_col:int (part_col)], Map 3 -> [part_col:int (part_col)]] Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE - Map 6 + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Reducer 4 (PARTITION-LEVEL SORT, 4) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 4), Map 5 (PARTITION-LEVEL SORT, 4) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: part_table_2 @@ -206,27 +236,14 @@ STAGE PLANS: expressions: col (type: int), part_col (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - Target Columns: [Map 1 -> [part_col:int (part_col)]] - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) -#### A masked pattern was here #### - Vertices: - Map 1 + value expressions: _col0 (type: int) + Map 3 Map Operator Tree: TableScan alias: part_table_1 @@ -242,7 +259,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 12 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Map 3 + Map 5 Map Operator Tree: TableScan alias: regular_table @@ -261,45 +278,44 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: part_table_2 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col (type: int), part_col (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col1 (type: int) - 1 _col0 (type: int) - 2 _col1 (type: int) + 1 _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 26 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 14 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int), _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int) + expressions: _col4 (type: int), _col2 (type: int), _col3 (type: int), _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 26 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 14 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 14 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 13 Data size: 13 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Stage: Stage-0 Fetch Operator @@ -383,7 +399,35 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: part_table_2 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col2 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target Columns: [Map 2 -> [part_col:int (part_col)]] + Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Map 3 Map Operator Tree: TableScan alias: regular_table @@ -400,7 +444,6 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - 2 _col1 (type: int) Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 @@ -412,45 +455,16 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator - Target Columns: [Map 1 -> [part_col:int (part_col)]] + Target Columns: [Map 2 -> [part_col:int (part_col)]] Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Map 3 - Map Operator Tree: - TableScan - alias: part_table_2 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: col (type: int), part_col (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - 2 _col1 (type: int) - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - Target Columns: [Map 1 -> [part_col:int (part_col)]] - Statistics: Num rows: 8 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: part_table_1 @@ -462,27 +476,34 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: 0 _col1 (type: int) 1 _col0 (type: int) - 2 _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2 input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 26 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int) + 1 Map 3 + Statistics: Num rows: 13 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 26 Data size: 26 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 26 Data size: 26 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input vertices: + 0 Map 1 + Statistics: Num rows: 14 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: int), _col2 (type: int), _col3 (type: int), _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 14 Data size: 14 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 14 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_recursive_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_recursive_mapjoin.q.out index 3ee10deea4..01a9354b07 100644 --- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_recursive_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_recursive_mapjoin.q.out @@ -324,45 +324,72 @@ POSTHOOK: Input: default@part_table2@part2_col=2 POSTHOOK: Input: default@reg_table POSTHOOK: Output: hdfs://### HDFS PATH ### STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: pt2 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + alias: pt1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: col (type: int), part2_col (type: int) + expressions: col (type: int), part1_col (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col1 (type: int) 1 _col1 (type: int) - 2 _col0 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target Columns: [Map 2 -> [part2_col:int (part2_col)]] + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: Map 2 Map Operator Tree: TableScan - alias: pt1 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + alias: pt2 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: col (type: int), part1_col (type: int) + expressions: col (type: int), part2_col (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) - 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col3 (type: int) Local Work: Map Reduce Local Work @@ -370,7 +397,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: rt @@ -386,23 +413,20 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - 2 _col0 (type: int) + 0 _col0 (type: int) + 1 _col3 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 input vertices: - 0 Map 1 1 Map 2 - Statistics: Num rows: 13 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int), _col3 (type: int), _col0 (type: int), _col1 (type: int), _col4 (type: int) + expressions: _col3 (type: int), _col4 (type: int), _col1 (type: int), _col2 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 13 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 13 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -486,50 +510,94 @@ POSTHOOK: Input: default@part_table3@part3_col=3 POSTHOOK: Input: default@reg_table POSTHOOK: Output: hdfs://### HDFS PATH ### STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan - alias: pt2 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + alias: pt1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: col (type: int), part2_col (type: int) + expressions: col (type: int), part1_col (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col1 (type: int) 1 _col1 (type: int) - 2 _col1 (type: int) - 3 _col0 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target Columns: [Map 2 -> [part3_col:int (part3_col)], Map 3 -> [part2_col:int (part2_col)]] + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Map 2 + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan - alias: pt1 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + alias: pt2 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: col (type: int), part1_col (type: int) + expressions: col (type: int), part2_col (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) - 2 _col1 (type: int) - 3 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col3 (type: int) + Select Operator + expressions: _col3 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target Columns: [Map 2 -> [part3_col:int (part3_col)]] + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Map 3 + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: pt3 @@ -538,12 +606,20 @@ STAGE PLANS: expressions: col (type: int), part3_col (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col1 (type: int) - 1 _col1 (type: int) - 2 _col1 (type: int) - 3 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col5 (type: int) Local Work: Map Reduce Local Work @@ -551,7 +627,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: rt @@ -567,26 +643,20 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 1 to 3 keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - 2 _col1 (type: int) - 3 _col0 (type: int) + 0 _col0 (type: int) + 1 _col5 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 input vertices: - 0 Map 1 1 Map 2 - 2 Map 3 - Statistics: Num rows: 19 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int), _col3 (type: int), _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int) + expressions: _col5 (type: int), _col6 (type: int), _col3 (type: int), _col4 (type: int), _col1 (type: int), _col2 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 19 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 19 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -696,51 +766,94 @@ POSTHOOK: Input: default@part_table4@part4_col=4 POSTHOOK: Input: default@reg_table POSTHOOK: Output: hdfs://### HDFS PATH ### STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-3 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-5 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan - alias: pt2 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + alias: pt1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: col (type: int), part2_col (type: int) + expressions: col (type: int), part1_col (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col1 (type: int) 1 _col1 (type: int) - 2 _col1 (type: int) - 3 _col1 (type: int) - 4 _col0 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target Columns: [Map 2 -> [part4_col:int (part4_col)], Map 3 -> [part3_col:int (part3_col)], Map 4 -> [part2_col:int (part2_col)]] + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Map 2 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan - alias: pt1 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + alias: pt2 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: col (type: int), part1_col (type: int) + expressions: col (type: int), part2_col (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) - 2 _col1 (type: int) - 3 _col1 (type: int) - 4 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col3 (type: int) + Select Operator + expressions: _col3 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target Columns: [Map 2 -> [part4_col:int (part4_col)], Map 3 -> [part3_col:int (part3_col)]] + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: Map 3 Map Operator Tree: TableScan @@ -750,16 +863,41 @@ STAGE PLANS: expressions: col (type: int), part3_col (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col1 (type: int) - 1 _col1 (type: int) - 2 _col1 (type: int) - 3 _col1 (type: int) - 4 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 1 Map 4 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col5 (type: int) + Select Operator + expressions: _col5 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target Columns: [Map 2 -> [part4_col:int (part4_col)]] + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Map 4 + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: pt4 @@ -768,13 +906,20 @@ STAGE PLANS: expressions: col (type: int), part4_col (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col1 (type: int) - 1 _col1 (type: int) - 2 _col1 (type: int) - 3 _col1 (type: int) - 4 _col0 (type: int) + 1 _col5 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + input vertices: + 1 Map 3 + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col7 (type: int) Local Work: Map Reduce Local Work @@ -782,7 +927,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 5 + Map 1 Map Operator Tree: TableScan alias: rt @@ -798,29 +943,20 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 1 to 3 - Inner Join 1 to 4 keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - 2 _col1 (type: int) - 3 _col1 (type: int) - 4 _col0 (type: int) + 0 _col0 (type: int) + 1 _col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 input vertices: - 0 Map 1 1 Map 2 - 2 Map 3 - 3 Map 4 - Statistics: Num rows: 26 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int), _col3 (type: int), _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) + expressions: _col7 (type: int), _col8 (type: int), _col5 (type: int), _col6 (type: int), _col3 (type: int), _col4 (type: int), _col1 (type: int), _col2 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -960,54 +1096,96 @@ POSTHOOK: Input: default@part_table5@part5_col=5 POSTHOOK: Input: default@reg_table POSTHOOK: Output: hdfs://### HDFS PATH ### STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-4 depends on stages: Stage-5 + Stage-3 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-6 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan - alias: pt2 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + alias: pt1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: col (type: int), part2_col (type: int) + expressions: col (type: int), part1_col (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col1 (type: int) 1 _col1 (type: int) - 2 _col1 (type: int) - 3 _col1 (type: int) - 4 _col1 (type: int) - 5 _col0 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target Columns: [Map 2 -> [part5_col:int (part5_col)], Map 3 -> [part4_col:int (part4_col)], Map 4 -> [part3_col:int (part3_col)], Map 5 -> [part2_col:int (part2_col)]] + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Map 2 + + Stage: Stage-5 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan - alias: pt1 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + alias: pt2 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: col (type: int), part1_col (type: int) + expressions: col (type: int), part2_col (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) - 2 _col1 (type: int) - 3 _col1 (type: int) - 4 _col1 (type: int) - 5 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 6 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col3 (type: int) + Select Operator + expressions: _col3 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target Columns: [Map 2 -> [part5_col:int (part5_col)], Map 3 -> [part4_col:int (part4_col)], Map 4 -> [part3_col:int (part3_col)]] + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Map 3 + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: pt3 @@ -1016,17 +1194,41 @@ STAGE PLANS: expressions: col (type: int), part3_col (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col1 (type: int) - 1 _col1 (type: int) - 2 _col1 (type: int) - 3 _col1 (type: int) - 4 _col1 (type: int) - 5 _col0 (type: int) + 1 _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 1 Map 5 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col5 (type: int) + Select Operator + expressions: _col5 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target Columns: [Map 2 -> [part5_col:int (part5_col)], Map 3 -> [part4_col:int (part4_col)]] + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Map 4 + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 Map Operator Tree: TableScan alias: pt4 @@ -1035,17 +1237,41 @@ STAGE PLANS: expressions: col (type: int), part4_col (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col1 (type: int) - 1 _col1 (type: int) - 2 _col1 (type: int) - 3 _col1 (type: int) - 4 _col1 (type: int) - 5 _col0 (type: int) + 1 _col5 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + input vertices: + 1 Map 4 + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col7 (type: int) + Select Operator + expressions: _col7 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target Columns: [Map 2 -> [part5_col:int (part5_col)]] + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Map 5 + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 Map Operator Tree: TableScan alias: pt5 @@ -1054,14 +1280,20 @@ STAGE PLANS: expressions: col (type: int), part5_col (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col1 (type: int) - 1 _col1 (type: int) - 2 _col1 (type: int) - 3 _col1 (type: int) - 4 _col1 (type: int) - 5 _col0 (type: int) + 1 _col7 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + input vertices: + 1 Map 3 + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col9 (type: int) Local Work: Map Reduce Local Work @@ -1069,7 +1301,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 6 + Map 1 Map Operator Tree: TableScan alias: rt @@ -1085,32 +1317,20 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 1 to 3 - Inner Join 1 to 4 - Inner Join 1 to 5 keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - 2 _col1 (type: int) - 3 _col1 (type: int) - 4 _col1 (type: int) - 5 _col0 (type: int) + 0 _col0 (type: int) + 1 _col9 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 input vertices: - 0 Map 1 1 Map 2 - 2 Map 3 - 3 Map 4 - 4 Map 5 - Statistics: Num rows: 33 Data size: 33 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: int), _col3 (type: int), _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: int) + expressions: _col9 (type: int), _col10 (type: int), _col7 (type: int), _col8 (type: int), _col5 (type: int), _col6 (type: int), _col3 (type: int), _col4 (type: int), _col1 (type: int), _col2 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 33 Data size: 33 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 33 Data size: 33 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out index 3f54eb8a90..da8b6bd1d3 100644 --- a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out @@ -400,78 +400,88 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (PARTITION-LEVEL SORT), Reducer 6 (PARTITION-LEVEL SORT), Reducer 8 (PARTITION-LEVEL SORT) -Reducer 3 <- Reducer 2 (GROUP) -Reducer 4 <- Reducer 3 (SORT) -Reducer 6 <- Map 5 (GROUP) -Reducer 8 <- Map 7 (GROUP) +Reducer 2 <- Map 1 (GROUP) +Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT), Reducer 7 (PARTITION-LEVEL SORT) +Reducer 4 <- Reducer 3 (GROUP) +Reducer 5 <- Reducer 4 (SORT) +Reducer 7 <- Map 6 (PARTITION-LEVEL SORT), Reducer 9 (PARTITION-LEVEL SORT) +Reducer 9 <- Map 8 (GROUP) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 - File Output Operator [FS_31] - Select Operator [SEL_29] (rows=1 width=20) + Reducer 5 + File Output Operator [FS_34] + Select Operator [SEL_32] (rows=1 width=20) Output:["_col0","_col1","_col2"] - <-Reducer 3 [SORT] - SORT [RS_28] - Select Operator [SEL_27] (rows=1 width=28) + <-Reducer 4 [SORT] + SORT [RS_31] + Select Operator [SEL_30] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_26] (rows=1 width=20) + Group By Operator [GBY_29] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [GROUP] - GROUP [RS_25] + <-Reducer 3 [GROUP] + GROUP [RS_28] PartitionCols:_col0, _col1 - Group By Operator [GBY_24] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col5, _col1 - Select Operator [SEL_23] (rows=1 width=24) - Output:["_col1","_col5"] - Filter Operator [FIL_21] (rows=1 width=24) - predicate:(((_col4 + _col7) >= 0) and ((_col4 > 0) or _col2)) - Join Operator [JOIN_20] (rows=6 width=23) - Output:["_col1","_col2","_col4","_col5","_col7"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Inner\",\"left\":1,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} - <-Map 1 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_17] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_32] (rows=18 width=84) - predicate:key is not null - TableScan [TS_0] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 6 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_18] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=2 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=2 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 5 [GROUP] - GROUP [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=2 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_33] (rows=5 width=93) - predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 8 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_19] + Group By Operator [GBY_27] (rows=1 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col7, _col3 + Select Operator [SEL_26] (rows=1 width=20) + Output:["_col3","_col7"] + Filter Operator [FIL_25] (rows=1 width=20) + predicate:((_col6 + _col1) >= 0) + Join Operator [JOIN_24] (rows=5 width=19) + Output:["_col1","_col3","_col6","_col7"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col3"} + <-Reducer 2 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_22] PartitionCols:_col0 - Select Operator [SEL_16] (rows=2 width=89) + Select Operator [SEL_6] (rows=2 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=2 width=93) + Group By Operator [GBY_5] (rows=2 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [GROUP] - GROUP [RS_14] + <-Map 1 [GROUP] + GROUP [RS_4] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_13] (rows=2 width=93) + Group By Operator [GBY_3] (rows=2 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_34] (rows=5 width=93) + Filter Operator [FIL_35] (rows=5 width=93) predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0) and key is not null) - TableScan [TS_10] (rows=20 width=88) + TableScan [TS_0] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 7 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_23] + PartitionCols:_col3 + Select Operator [SEL_21] (rows=5 width=104) + Output:["_col1","_col3","_col4","_col5"] + Filter Operator [FIL_20] (rows=5 width=104) + predicate:((_col4 > 0) or _col2) + Join Operator [JOIN_19] (rows=6 width=104) + Output:["_col1","_col2","_col3","_col4","_col5"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"} + <-Map 6 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_17] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=18 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_36] (rows=18 width=84) + predicate:key is not null + TableScan [TS_7] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 9 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_18] + PartitionCols:_col0 + Select Operator [SEL_16] (rows=2 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_15] (rows=2 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 8 [GROUP] + GROUP [RS_14] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_13] (rows=2 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_37] (rows=5 width=93) + predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0) and key is not null) + TableScan [TS_10] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b % c asc, b desc) cbo_t1 left outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p left outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int % c asc, cbo_t3.c_int desc PREHOOK: type: QUERY @@ -492,78 +502,88 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (PARTITION-LEVEL SORT), Reducer 6 (PARTITION-LEVEL SORT), Reducer 8 (PARTITION-LEVEL SORT) -Reducer 3 <- Reducer 2 (GROUP) -Reducer 4 <- Reducer 3 (SORT) -Reducer 6 <- Map 5 (GROUP) -Reducer 8 <- Map 7 (GROUP) +Reducer 2 <- Map 1 (GROUP) +Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT), Reducer 7 (PARTITION-LEVEL SORT) +Reducer 4 <- Reducer 3 (GROUP) +Reducer 5 <- Reducer 4 (SORT) +Reducer 7 <- Map 6 (PARTITION-LEVEL SORT), Reducer 9 (PARTITION-LEVEL SORT) +Reducer 9 <- Map 8 (GROUP) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 - File Output Operator [FS_33] - Select Operator [SEL_31] (rows=1 width=20) + Reducer 5 + File Output Operator [FS_36] + Select Operator [SEL_34] (rows=1 width=20) Output:["_col0","_col1","_col2"] - <-Reducer 3 [SORT] - SORT [RS_30] - Select Operator [SEL_29] (rows=1 width=28) + <-Reducer 4 [SORT] + SORT [RS_33] + Select Operator [SEL_32] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_28] (rows=1 width=20) + Group By Operator [GBY_31] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [GROUP] - GROUP [RS_27] + <-Reducer 3 [GROUP] + GROUP [RS_30] PartitionCols:_col0, _col1 - Group By Operator [GBY_26] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col7 - Select Operator [SEL_25] (rows=1 width=36) - Output:["_col1","_col7"] - Filter Operator [FIL_21] (rows=1 width=36) - predicate:(((_col6 + _col9) >= 0) and ((_col4 + _col7) >= 0L) and (_col3 or (_col7 >= 1L)) and ((_col6 > 0) or _col2)) - Join Operator [JOIN_20] (rows=3 width=32) - Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col9"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Inner\",\"left\":1,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} - <-Map 1 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_17] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=99) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_34] (rows=18 width=84) - predicate:((c_int > 0) and key is not null) - TableScan [TS_0] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 6 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_18] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=1 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 5 [GROUP] - GROUP [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_35] (rows=2 width=93) - predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 8 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_19] + Group By Operator [GBY_29] (rows=1 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col3, _col9 + Select Operator [SEL_28] (rows=1 width=20) + Output:["_col3","_col9"] + Filter Operator [FIL_27] (rows=1 width=20) + predicate:((_col8 + _col1) >= 0) + Join Operator [JOIN_26] (rows=1 width=20) + Output:["_col1","_col3","_col8","_col9"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col5"} + <-Reducer 2 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_24] PartitionCols:_col0 - Select Operator [SEL_16] (rows=1 width=89) + Select Operator [SEL_6] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=1 width=93) + Group By Operator [GBY_5] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [GROUP] - GROUP [RS_14] + <-Map 1 [GROUP] + GROUP [RS_4] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_13] (rows=1 width=93) + Group By Operator [GBY_3] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_36] (rows=2 width=93) + Filter Operator [FIL_37] (rows=2 width=93) predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) - TableScan [TS_10] (rows=20 width=88) + TableScan [TS_0] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 7 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_25] + PartitionCols:_col5 + Select Operator [SEL_23] (rows=1 width=117) + Output:["_col1","_col5","_col6","_col7"] + Filter Operator [FIL_20] (rows=1 width=117) + predicate:(((_col4 + _col7) >= 0L) and (_col3 or (_col7 >= 1L)) and ((_col6 > 0) or _col2)) + Join Operator [JOIN_19] (rows=3 width=113) + Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"} + <-Map 6 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_17] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=18 width=99) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_38] (rows=18 width=84) + predicate:((c_int > 0) and key is not null) + TableScan [TS_7] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 9 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_18] + PartitionCols:_col0 + Select Operator [SEL_16] (rows=1 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_15] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 8 [GROUP] + GROUP [RS_14] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_13] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_39] (rows=2 width=93) + predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) + TableScan [TS_10] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b+c, a desc) cbo_t1 right outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p right outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 2) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c PREHOOK: type: QUERY @@ -584,71 +604,81 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (PARTITION-LEVEL SORT), Reducer 5 (PARTITION-LEVEL SORT), Reducer 7 (PARTITION-LEVEL SORT) -Reducer 3 <- Reducer 2 (GROUP) -Reducer 5 <- Map 4 (GROUP) -Reducer 7 <- Map 6 (GROUP) +Reducer 2 <- Map 1 (GROUP) +Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT), Reducer 6 (PARTITION-LEVEL SORT) +Reducer 4 <- Reducer 3 (GROUP) +Reducer 6 <- Map 5 (PARTITION-LEVEL SORT), Reducer 8 (PARTITION-LEVEL SORT) +Reducer 8 <- Map 7 (GROUP) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 - File Output Operator [FS_28] - Group By Operator [GBY_26] (rows=1 width=20) + Reducer 4 + File Output Operator [FS_31] + Group By Operator [GBY_29] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [GROUP] - GROUP [RS_25] + <-Reducer 3 [GROUP] + GROUP [RS_28] PartitionCols:_col0, _col1 - Group By Operator [GBY_24] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col5 - Select Operator [SEL_23] (rows=1 width=24) - Output:["_col1","_col5"] - Filter Operator [FIL_21] (rows=1 width=24) - predicate:(((_col4 + _col7) >= 2) and ((_col4 > 0) or _col2)) - Join Operator [JOIN_20] (rows=3 width=22) - Output:["_col1","_col2","_col4","_col5","_col7"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Inner\",\"left\":1,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} - <-Map 1 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_17] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_29] (rows=18 width=84) - predicate:key is not null - TableScan [TS_0] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 5 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_18] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=1 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 4 [GROUP] - GROUP [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_30] (rows=2 width=93) - predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 7 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_19] + Group By Operator [GBY_27] (rows=1 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col3, _col7 + Select Operator [SEL_26] (rows=1 width=20) + Output:["_col3","_col7"] + Filter Operator [FIL_25] (rows=1 width=20) + predicate:((_col6 + _col1) >= 2) + Join Operator [JOIN_24] (rows=2 width=20) + Output:["_col1","_col3","_col6","_col7"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col3"} + <-Reducer 2 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_22] PartitionCols:_col0 - Select Operator [SEL_16] (rows=1 width=89) + Select Operator [SEL_6] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=1 width=93) + Group By Operator [GBY_5] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 6 [GROUP] - GROUP [RS_14] + <-Map 1 [GROUP] + GROUP [RS_4] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_13] (rows=1 width=93) + Group By Operator [GBY_3] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_31] (rows=2 width=93) + Filter Operator [FIL_32] (rows=2 width=93) predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) - TableScan [TS_10] (rows=20 width=88) + TableScan [TS_0] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 6 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_23] + PartitionCols:_col3 + Select Operator [SEL_21] (rows=2 width=105) + Output:["_col1","_col3","_col4","_col5"] + Filter Operator [FIL_20] (rows=2 width=105) + predicate:((_col4 > 0) or _col2) + Join Operator [JOIN_19] (rows=3 width=103) + Output:["_col1","_col2","_col3","_col4","_col5"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"} + <-Map 5 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_17] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=18 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_33] (rows=18 width=84) + predicate:key is not null + TableScan [TS_7] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 8 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_18] + PartitionCols:_col0 + Select Operator [SEL_16] (rows=1 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_15] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 7 [GROUP] + GROUP [RS_14] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_13] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_34] (rows=2 width=93) + predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) + TableScan [TS_10] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by c+a desc) cbo_t1 full outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by p+q desc, r asc) cbo_t2 on cbo_t1.a=p full outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int PREHOOK: type: QUERY @@ -669,76 +699,86 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (PARTITION-LEVEL SORT), Reducer 6 (PARTITION-LEVEL SORT), Reducer 8 (PARTITION-LEVEL SORT) -Reducer 3 <- Reducer 2 (GROUP) -Reducer 4 <- Reducer 3 (SORT) -Reducer 6 <- Map 5 (GROUP) -Reducer 8 <- Map 7 (GROUP) +Reducer 2 <- Map 1 (GROUP) +Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT), Reducer 7 (PARTITION-LEVEL SORT) +Reducer 4 <- Reducer 3 (GROUP) +Reducer 5 <- Reducer 4 (SORT) +Reducer 7 <- Map 6 (PARTITION-LEVEL SORT), Reducer 9 (PARTITION-LEVEL SORT) +Reducer 9 <- Map 8 (GROUP) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 - File Output Operator [FS_32] - Select Operator [SEL_31] (rows=1 width=20) + Reducer 5 + File Output Operator [FS_35] + Select Operator [SEL_34] (rows=1 width=20) Output:["_col0","_col1","_col2"] - <-Reducer 3 [SORT] - SORT [RS_30] - Group By Operator [GBY_28] (rows=1 width=20) + <-Reducer 4 [SORT] + SORT [RS_33] + Group By Operator [GBY_31] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [GROUP] - GROUP [RS_27] + <-Reducer 3 [GROUP] + GROUP [RS_30] PartitionCols:_col0, _col1 - Group By Operator [GBY_26] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col7 - Select Operator [SEL_25] (rows=1 width=36) - Output:["_col1","_col7"] - Filter Operator [FIL_21] (rows=1 width=36) - predicate:(((_col6 + _col9) >= 0) and ((_col4 + _col7) >= 0L) and (_col3 or (_col7 >= 1L)) and ((_col6 > 0) or _col2)) - Join Operator [JOIN_20] (rows=3 width=32) - Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col9"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Inner\",\"left\":1,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} - <-Map 1 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_17] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=99) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_33] (rows=18 width=84) - predicate:((c_int > 0) and key is not null) - TableScan [TS_0] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 6 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_18] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=1 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 5 [GROUP] - GROUP [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_34] (rows=2 width=93) - predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 8 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_19] + Group By Operator [GBY_29] (rows=1 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col3, _col9 + Select Operator [SEL_28] (rows=1 width=20) + Output:["_col3","_col9"] + Filter Operator [FIL_27] (rows=1 width=20) + predicate:((_col8 + _col1) >= 0) + Join Operator [JOIN_26] (rows=1 width=20) + Output:["_col1","_col3","_col8","_col9"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col5"} + <-Reducer 2 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_24] PartitionCols:_col0 - Select Operator [SEL_16] (rows=1 width=89) + Select Operator [SEL_6] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=1 width=93) + Group By Operator [GBY_5] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [GROUP] - GROUP [RS_14] + <-Map 1 [GROUP] + GROUP [RS_4] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_13] (rows=1 width=93) + Group By Operator [GBY_3] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_35] (rows=2 width=93) + Filter Operator [FIL_36] (rows=2 width=93) predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) - TableScan [TS_10] (rows=20 width=88) + TableScan [TS_0] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 7 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_25] + PartitionCols:_col5 + Select Operator [SEL_23] (rows=1 width=117) + Output:["_col1","_col5","_col6","_col7"] + Filter Operator [FIL_20] (rows=1 width=117) + predicate:(((_col4 + _col7) >= 0L) and (_col3 or (_col7 >= 1L)) and ((_col6 > 0) or _col2)) + Join Operator [JOIN_19] (rows=3 width=113) + Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"} + <-Map 6 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_17] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=18 width=99) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_37] (rows=18 width=84) + predicate:((c_int > 0) and key is not null) + TableScan [TS_7] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 9 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_18] + PartitionCols:_col0 + Select Operator [SEL_16] (rows=1 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_15] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 8 [GROUP] + GROUP [RS_14] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_13] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_38] (rows=2 width=93) + predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) + TableScan [TS_10] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c PREHOOK: type: QUERY @@ -759,71 +799,81 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (PARTITION-LEVEL SORT), Reducer 5 (PARTITION-LEVEL SORT), Reducer 7 (PARTITION-LEVEL SORT) -Reducer 3 <- Reducer 2 (GROUP) -Reducer 5 <- Map 4 (GROUP) -Reducer 7 <- Map 6 (GROUP) +Reducer 2 <- Map 1 (GROUP) +Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT), Reducer 6 (PARTITION-LEVEL SORT) +Reducer 4 <- Reducer 3 (GROUP) +Reducer 6 <- Map 5 (PARTITION-LEVEL SORT), Reducer 8 (PARTITION-LEVEL SORT) +Reducer 8 <- Map 7 (GROUP) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 - File Output Operator [FS_28] - Group By Operator [GBY_26] (rows=1 width=20) + Reducer 4 + File Output Operator [FS_31] + Group By Operator [GBY_29] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [GROUP] - GROUP [RS_25] + <-Reducer 3 [GROUP] + GROUP [RS_28] PartitionCols:_col0, _col1 - Group By Operator [GBY_24] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col5 - Select Operator [SEL_23] (rows=1 width=24) - Output:["_col1","_col5"] - Filter Operator [FIL_21] (rows=1 width=24) - predicate:(((_col4 + _col7) >= 0) and ((_col4 > 0) or _col2)) - Join Operator [JOIN_20] (rows=3 width=22) - Output:["_col1","_col2","_col4","_col5","_col7"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Inner\",\"left\":1,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} - <-Map 1 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_17] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_29] (rows=18 width=84) - predicate:key is not null - TableScan [TS_0] (rows=20 width=84) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 5 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_18] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=1 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 4 [GROUP] - GROUP [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_30] (rows=2 width=93) - predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 7 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_19] + Group By Operator [GBY_27] (rows=1 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col3, _col7 + Select Operator [SEL_26] (rows=1 width=20) + Output:["_col3","_col7"] + Filter Operator [FIL_25] (rows=1 width=20) + predicate:((_col6 + _col1) >= 0) + Join Operator [JOIN_24] (rows=2 width=20) + Output:["_col1","_col3","_col6","_col7"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col3"} + <-Reducer 2 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_22] PartitionCols:_col0 - Select Operator [SEL_16] (rows=1 width=89) + Select Operator [SEL_6] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=1 width=93) + Group By Operator [GBY_5] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 6 [GROUP] - GROUP [RS_14] + <-Map 1 [GROUP] + GROUP [RS_4] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_13] (rows=1 width=93) + Group By Operator [GBY_3] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_31] (rows=2 width=93) + Filter Operator [FIL_32] (rows=2 width=93) predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) - TableScan [TS_10] (rows=20 width=88) + TableScan [TS_0] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 6 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_23] + PartitionCols:_col3 + Select Operator [SEL_21] (rows=2 width=105) + Output:["_col1","_col3","_col4","_col5"] + Filter Operator [FIL_20] (rows=2 width=105) + predicate:((_col4 > 0) or _col2) + Join Operator [JOIN_19] (rows=3 width=103) + Output:["_col1","_col2","_col3","_col4","_col5"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"} + <-Map 5 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_17] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=18 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_33] (rows=18 width=84) + predicate:key is not null + TableScan [TS_7] (rows=20 width=84) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 8 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_18] + PartitionCols:_col0 + Select Operator [SEL_16] (rows=1 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_15] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 7 [GROUP] + GROUP [RS_14] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_13] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_34] (rows=2 width=93) + predicate:((c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((c_int + 1) >= 0) and ((UDFToFloat(c_int) + c_float) >= 0.0) and ((c_int > 0) or c_float is not null) and key is not null) + TableScan [TS_10] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select unionsrc.key FROM (select 'tst1' as key, count(1) as value from src) unionsrc PREHOOK: type: QUERY @@ -1246,47 +1296,55 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (PARTITION-LEVEL SORT), Map 3 (PARTITION-LEVEL SORT), Map 4 (PARTITION-LEVEL SORT) +Reducer 2 <- Map 1 (PARTITION-LEVEL SORT), Reducer 4 (PARTITION-LEVEL SORT) +Reducer 4 <- Map 3 (PARTITION-LEVEL SORT), Map 5 (PARTITION-LEVEL SORT) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 2 - File Output Operator [FS_17] - Select Operator [SEL_16] (rows=24 width=101) + File Output Operator [FS_20] + Select Operator [SEL_19] (rows=24 width=100) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_13] (rows=24 width=105) - predicate:(((_col1 > 0) or (_col6 >= 0)) and ((_col1 > 0) or _col7) and ((_col1 + _col4) = 2)) - Join Operator [JOIN_12] (rows=48 width=104) - Output:["_col1","_col2","_col3","_col4","_col6","_col7"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Inner\",\"left\":0,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} + Filter Operator [FIL_17] (rows=24 width=104) + predicate:(((_col4 > 0) or (_col1 >= 0)) and ((_col4 > 0) or _col2)) + Join Operator [JOIN_16] (rows=24 width=104) + Output:["_col1","_col2","_col4","_col5","_col6","_col7"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"} <-Map 1 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_9] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=9 width=93) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_18] (rows=9 width=93) - predicate:(((c_int > 0) or (c_float >= 0.0)) and key is not null and ((c_int + 1) = 2)) - TableScan [TS_0] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Map 3 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=9 width=89) - Output:["_col0","_col1"] - Filter Operator [FIL_19] (rows=9 width=93) - predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) = 2) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Map 4 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_11] + PARTITION-LEVEL SORT [RS_14] PartitionCols:_col0 - Select Operator [SEL_8] (rows=18 width=88) + Select Operator [SEL_2] (rows=18 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_20] (rows=18 width=84) + Filter Operator [FIL_21] (rows=18 width=84) predicate:key is not null - TableScan [TS_6] (rows=20 width=84) + TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 4 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_15] + PartitionCols:_col0 + Filter Operator [FIL_12] (rows=8 width=182) + predicate:((_col1 + _col4) = 2) + Join Operator [JOIN_11] (rows=16 width=182) + Output:["_col0","_col1","_col2","_col3","_col4"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"} + <-Map 3 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_9] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=9 width=93) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_22] (rows=9 width=93) + predicate:(((c_int > 0) or (c_float >= 0.0)) and key is not null and ((c_int + 1) = 2)) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Map 5 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_10] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=9 width=89) + Output:["_col0","_col1"] + Filter Operator [FIL_23] (rows=9 width=93) + predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) = 2) and key is not null) + TableScan [TS_6] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select * from (select q, b, cbo_t2.p, cbo_t1.c, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1 where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0)) cbo_t1 right outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2 where (cbo_t2.c_int + 1 == 2) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0)) cbo_t2 on cbo_t1.a=p right outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q == 2) and (b > 0 or c_int >= 0)) R where (q + 1 = 2) and (R.b > 0 or c_int >= 0) PREHOOK: type: QUERY @@ -1307,47 +1365,55 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (PARTITION-LEVEL SORT), Map 3 (PARTITION-LEVEL SORT), Map 4 (PARTITION-LEVEL SORT) +Reducer 2 <- Map 1 (PARTITION-LEVEL SORT), Reducer 4 (PARTITION-LEVEL SORT) +Reducer 4 <- Map 3 (PARTITION-LEVEL SORT), Map 5 (PARTITION-LEVEL SORT) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 2 - File Output Operator [FS_17] - Select Operator [SEL_16] (rows=24 width=101) + File Output Operator [FS_20] + Select Operator [SEL_19] (rows=24 width=100) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_13] (rows=24 width=105) - predicate:(((_col1 > 0) or (_col6 >= 0)) and ((_col1 > 0) or _col7) and ((_col1 + _col4) = 2)) - Join Operator [JOIN_12] (rows=48 width=104) - Output:["_col1","_col2","_col3","_col4","_col6","_col7"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Inner\",\"left\":0,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} + Filter Operator [FIL_17] (rows=24 width=104) + predicate:(((_col4 > 0) or (_col1 >= 0)) and ((_col4 > 0) or _col2)) + Join Operator [JOIN_16] (rows=24 width=104) + Output:["_col1","_col2","_col4","_col5","_col6","_col7"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"} <-Map 1 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_9] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=9 width=93) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_18] (rows=9 width=93) - predicate:(((c_int > 0) or (c_float >= 0.0)) and key is not null and ((c_int + 1) = 2)) - TableScan [TS_0] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Map 3 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=9 width=89) - Output:["_col0","_col1"] - Filter Operator [FIL_19] (rows=9 width=93) - predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) = 2) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Map 4 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_11] + PARTITION-LEVEL SORT [RS_14] PartitionCols:_col0 - Select Operator [SEL_8] (rows=18 width=88) + Select Operator [SEL_2] (rows=18 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_20] (rows=18 width=84) + Filter Operator [FIL_21] (rows=18 width=84) predicate:key is not null - TableScan [TS_6] (rows=20 width=84) + TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 4 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_15] + PartitionCols:_col0 + Filter Operator [FIL_12] (rows=8 width=182) + predicate:((_col1 + _col4) = 2) + Join Operator [JOIN_11] (rows=16 width=182) + Output:["_col0","_col1","_col2","_col3","_col4"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"} + <-Map 3 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_9] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=9 width=93) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_22] (rows=9 width=93) + predicate:(((c_int > 0) or (c_float >= 0.0)) and key is not null and ((c_int + 1) = 2)) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Map 5 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_10] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=9 width=89) + Output:["_col0","_col1"] + Filter Operator [FIL_23] (rows=9 width=93) + predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) = 2) and key is not null) + TableScan [TS_6] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select key, (c_int+1)+2 as x, sum(c_int) from cbo_t1 group by c_float, cbo_t1.c_int, key order by x limit 1 PREHOOK: type: QUERY @@ -1519,100 +1585,110 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (GROUP) -Reducer 3 <- Reducer 2 (SORT) -Reducer 4 <- Map 10 (PARTITION-LEVEL SORT), Reducer 3 (PARTITION-LEVEL SORT), Reducer 9 (PARTITION-LEVEL SORT) -Reducer 5 <- Reducer 4 (GROUP) -Reducer 6 <- Reducer 5 (SORT) -Reducer 8 <- Map 7 (GROUP) -Reducer 9 <- Reducer 8 (SORT) +Reducer 10 <- Map 9 (GROUP) +Reducer 11 <- Reducer 10 (SORT) +Reducer 2 <- Map 1 (PARTITION-LEVEL SORT), Reducer 8 (PARTITION-LEVEL SORT) +Reducer 3 <- Reducer 2 (GROUP) +Reducer 4 <- Reducer 3 (SORT) +Reducer 6 <- Map 5 (GROUP) +Reducer 7 <- Reducer 6 (SORT) +Reducer 8 <- Reducer 11 (PARTITION-LEVEL SORT), Reducer 7 (PARTITION-LEVEL SORT) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 - File Output Operator [FS_44] - Limit [LIM_42] (rows=1 width=28) + Reducer 4 + File Output Operator [FS_47] + Limit [LIM_45] (rows=1 width=28) Number of rows:5 - Select Operator [SEL_41] (rows=1 width=28) + Select Operator [SEL_44] (rows=1 width=28) Output:["_col0","_col1","_col2"] - <-Reducer 5 [SORT] - SORT [RS_40] - Select Operator [SEL_39] (rows=1 width=28) + <-Reducer 3 [SORT] + SORT [RS_43] + Select Operator [SEL_42] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_38] (rows=1 width=20) + Group By Operator [GBY_41] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 4 [GROUP] - GROUP [RS_37] + <-Reducer 2 [GROUP] + GROUP [RS_40] PartitionCols:_col0, _col1 - Group By Operator [GBY_36] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col7 - Select Operator [SEL_35] (rows=2 width=28) - Output:["_col4","_col7"] - Filter Operator [FIL_33] (rows=2 width=28) - predicate:((_col5 or _col8) and ((_col3 + _col1) >= 0)) - Join Operator [JOIN_32] (rows=6 width=27) - Output:["_col1","_col3","_col4","_col5","_col7","_col8"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"},{"":"{\"type\":\"Inner\",\"left\":1,\"right\":2}"}],keys:{"0":"_col0","1":"_col0","2":"_col0"} - <-Map 10 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_31] + Group By Operator [GBY_39] (rows=1 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col7, _col1 + Select Operator [SEL_38] (rows=2 width=20) + Output:["_col1","_col7"] + Filter Operator [FIL_37] (rows=2 width=20) + predicate:(_col8 or _col2) + Join Operator [JOIN_36] (rows=3 width=18) + Output:["_col1","_col2","_col7","_col8"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col2"} + <-Map 1 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_34] PartitionCols:_col0 - Select Operator [SEL_28] (rows=18 width=88) + Select Operator [SEL_2] (rows=18 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_47] (rows=18 width=84) + Filter Operator [FIL_48] (rows=18 width=84) predicate:key is not null - TableScan [TS_26] (rows=20 width=84) + TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 3 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_29] - PartitionCols:_col0 - Filter Operator [FIL_11] (rows=2 width=105) - predicate:_col0 is not null - Limit [LIM_9] (rows=3 width=105) - Number of rows:5 - Select Operator [SEL_8] (rows=3 width=105) - Output:["_col0","_col1"] - <-Reducer 2 [SORT] - SORT [RS_7] - Select Operator [SEL_6] (rows=3 width=105) + <-Reducer 8 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_35] + PartitionCols:_col2 + Select Operator [SEL_33] (rows=1 width=105) + Output:["_col2","_col4","_col5"] + Filter Operator [FIL_32] (rows=1 width=105) + predicate:((_col3 + _col1) >= 0) + Join Operator [JOIN_31] (rows=2 width=105) + Output:["_col1","_col2","_col3","_col4","_col5"],condition map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"} + <-Reducer 11 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_30] + PartitionCols:_col0 + Select Operator [SEL_28] (rows=2 width=101) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_5] (rows=3 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 1 [GROUP] - GROUP [RS_4] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_3] (rows=3 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_45] (rows=6 width=93) - predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0)) - TableScan [TS_0] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 9 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_30] - PartitionCols:_col0 - Select Operator [SEL_25] (rows=2 width=101) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_24] (rows=2 width=97) - predicate:_col0 is not null - Limit [LIM_22] (rows=3 width=97) - Number of rows:5 - Select Operator [SEL_21] (rows=3 width=97) - Output:["_col0","_col1","_col2"] - <-Reducer 8 [SORT] - SORT [RS_20] - Select Operator [SEL_19] (rows=3 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_18] (rows=3 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [GROUP] - GROUP [RS_17] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_16] (rows=3 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_46] (rows=6 width=93) - predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0)) - TableScan [TS_13] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Filter Operator [FIL_27] (rows=2 width=97) + predicate:_col0 is not null + Limit [LIM_25] (rows=3 width=97) + Number of rows:5 + Select Operator [SEL_24] (rows=3 width=97) + Output:["_col0","_col1","_col2"] + <-Reducer 10 [SORT] + SORT [RS_23] + Select Operator [SEL_22] (rows=3 width=97) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_21] (rows=3 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 9 [GROUP] + GROUP [RS_20] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_19] (rows=3 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_50] (rows=6 width=93) + predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0)) + TableScan [TS_16] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 7 [PARTITION-LEVEL SORT] + PARTITION-LEVEL SORT [RS_29] + PartitionCols:_col0 + Filter Operator [FIL_14] (rows=2 width=105) + predicate:_col0 is not null + Limit [LIM_12] (rows=3 width=105) + Number of rows:5 + Select Operator [SEL_11] (rows=3 width=105) + Output:["_col0","_col1"] + <-Reducer 6 [SORT] + SORT [RS_10] + Select Operator [SEL_9] (rows=3 width=105) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_8] (rows=3 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 5 [GROUP] + GROUP [RS_7] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_6] (rows=3 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_49] (rows=6 width=93) + predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0)) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t1.c_int from cbo_t1 left semi join cbo_t2 on cbo_t1.key=cbo_t2.key where (cbo_t1.c_int + 1 == 2) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) PREHOOK: type: QUERY @@ -4477,39 +4553,41 @@ Stage-0 Fetch Operator limit:-1 Stage-1 - Map 1 - File Output Operator [FS_16] - Map Join Operator [MAPJOIN_20] (rows=805 width=10) - Conds:MAPJOIN_21._col1=MAPJOIN_21._col0(Inner),Output:["_col0"] - <-Map Join Operator [MAPJOIN_21] (rows=732 width=10) - Conds:SEL_2._col0=SEL_2._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_2] (rows=666 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_17] (rows=666 width=10) - predicate:((value > 'val_450') and key is not null) - TableScan [TS_0] (rows=2000 width=10) - default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Map 2 + File Output Operator [FS_17] + Select Operator [SEL_16] (rows=805 width=10) + Output:["_col0"] + Map Join Operator [MAPJOIN_21] (rows=805 width=10) + Conds:MAPJOIN_22._col0=MAPJOIN_22._col1(Inner),Output:["_col1"] + <-Map Join Operator [MAPJOIN_22] (rows=732 width=10) + Conds:SEL_5._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=666 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_19] (rows=666 width=10) + predicate:((value > 'val_450') and key is not null) + TableScan [TS_3] (rows=2000 width=10) + default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] Map Reduce Local Work Stage-2 - Map 2 - keys: [HASHTABLESINK_23] - 0_col0,1_col0 - Select Operator [SEL_5] (rows=25 width=7) + Map 1 + keys: [HASHTABLESINK_24] + 0_col0,1_col1 + Select Operator [SEL_2] (rows=166 width=10) Output:["_col0"] - Filter Operator [FIL_18] (rows=25 width=7) - predicate:key is not null - TableScan [TS_3] (rows=25 width=7) - default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key"] + Filter Operator [FIL_18] (rows=166 width=10) + predicate:(value > 'val_450') + TableScan [TS_0] (rows=500 width=10) + default@src,src,Tbl:COMPLETE,Col:NONE,Output:["value"] Map Reduce Local Work Map 3 - keys: [HASHTABLESINK_26] - 0_col1,1_col0 - Select Operator [SEL_8] (rows=166 width=10) + keys: [HASHTABLESINK_27] + 0_col0,1_col0 + Select Operator [SEL_8] (rows=25 width=7) Output:["_col0"] - Filter Operator [FIL_19] (rows=166 width=10) - predicate:(value > 'val_450') - TableScan [TS_6] (rows=500 width=10) - default@src,src,Tbl:COMPLETE,Col:NONE,Output:["value"] + Filter Operator [FIL_20] (rows=25 width=7) + predicate:key is not null + TableScan [TS_6] (rows=25 width=7) + default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key"] Map Reduce Local Work PREHOOK: query: explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450' @@ -4538,39 +4616,41 @@ Stage-0 Fetch Operator limit:-1 Stage-1 - Map 1 - File Output Operator [FS_16] - Map Join Operator [MAPJOIN_20] (rows=805 width=10) - Conds:MAPJOIN_21._col1=MAPJOIN_21._col0(Inner),Output:["_col0"] - <-Map Join Operator [MAPJOIN_21] (rows=732 width=10) - Conds:SEL_2._col0=SEL_2._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_2] (rows=666 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_17] (rows=666 width=10) - predicate:((value > 'val_450') and key is not null) - TableScan [TS_0] (rows=2000 width=10) - default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Map 2 + File Output Operator [FS_17] + Select Operator [SEL_16] (rows=805 width=10) + Output:["_col0"] + Map Join Operator [MAPJOIN_21] (rows=805 width=10) + Conds:MAPJOIN_22._col0=MAPJOIN_22._col1(Inner),Output:["_col1"] + <-Map Join Operator [MAPJOIN_22] (rows=732 width=10) + Conds:SEL_5._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=666 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_19] (rows=666 width=10) + predicate:((value > 'val_450') and key is not null) + TableScan [TS_3] (rows=2000 width=10) + default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] Map Reduce Local Work Stage-2 - Map 2 - keys: [HASHTABLESINK_23] - 0_col0,1_col0 - Select Operator [SEL_5] (rows=25 width=7) + Map 1 + keys: [HASHTABLESINK_24] + 0_col0,1_col1 + Select Operator [SEL_2] (rows=166 width=10) Output:["_col0"] - Filter Operator [FIL_18] (rows=25 width=7) - predicate:key is not null - TableScan [TS_3] (rows=25 width=7) - default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key"] + Filter Operator [FIL_18] (rows=166 width=10) + predicate:(value > 'val_450') + TableScan [TS_0] (rows=500 width=10) + default@src,src,Tbl:COMPLETE,Col:NONE,Output:["value"] Map Reduce Local Work Map 3 - keys: [HASHTABLESINK_26] - 0_col1,1_col0 - Select Operator [SEL_8] (rows=166 width=10) + keys: [HASHTABLESINK_27] + 0_col0,1_col0 + Select Operator [SEL_8] (rows=25 width=7) Output:["_col0"] - Filter Operator [FIL_19] (rows=166 width=10) - predicate:(value > 'val_450') - TableScan [TS_6] (rows=500 width=10) - default@src,src,Tbl:COMPLETE,Col:NONE,Output:["value"] + Filter Operator [FIL_20] (rows=25 width=7) + predicate:key is not null + TableScan [TS_6] (rows=25 width=7) + default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key"] Map Reduce Local Work PREHOOK: query: explain diff --git a/ql/src/test/results/clientpositive/spark/spark_use_op_stats.q.out b/ql/src/test/results/clientpositive/spark/spark_use_op_stats.q.out index d8478ef026..27f262d782 100644 --- a/ql/src/test/results/clientpositive/spark/spark_use_op_stats.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_use_op_stats.q.out @@ -160,108 +160,128 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 55), Map 4 (PARTITION-LEVEL SORT, 55), Reducer 6 (PARTITION-LEVEL SORT, 55) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 43), Map 7 (PARTITION-LEVEL SORT, 43) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 43), Map 5 (PARTITION-LEVEL SORT, 43) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 43), Reducer 7 (PARTITION-LEVEL SORT, 43) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 13), Map 8 (PARTITION-LEVEL SORT, 13) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: src1 + alias: src2 filterExpr: (UDFToDouble(key) > 150.0D) (type: boolean) - Statistics: Num rows: 148 Data size: 1542 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(key) > 150.0D) (type: boolean) - Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan - alias: src2 + alias: src1 filterExpr: (UDFToDouble(key) > 150.0D) (type: boolean) - Statistics: Num rows: 148 Data size: 1542 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(key) > 150.0D) (type: boolean) - Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 5 + Map 6 Map Operator Tree: TableScan - alias: src2 + alias: src1 filterExpr: (UDFToDouble(key) > 150.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 148 Data size: 1542 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(key) > 150.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 + Map 8 Map Operator Tree: TableScan - alias: src1 + alias: src2 filterExpr: (UDFToDouble(key) > 150.0D) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 148 Data size: 1542 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(key) > 150.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 49 Data size: 510 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string) 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 400 Data size: 4265 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hash(_col0,_col3) (type: int) + expressions: hash(_col2,_col1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 400 Data size: 4265 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) minReductionHashAggr: 0.99 @@ -273,7 +293,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -288,7 +308,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 7 Reduce Operator Tree: Join Operator condition map: @@ -296,19 +316,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 53 Data size: 561 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 53 Data size: 561 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out index 98b62417bb..72a015ff21 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out @@ -215,7 +215,8 @@ POSTHOOK: Input: default@part_null_n0 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully -Warning: Shuffle Join JOIN[17][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[16][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 5' is a cross product +Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: select * from part where p_size > (select * from tempty_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -226,7 +227,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part POSTHOOK: Input: default@tempty_n0 #### A masked pattern was here #### -Warning: Shuffle Join JOIN[17][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[16][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 5' is a cross product +Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where p_size > (select * from tempty_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -245,11 +247,31 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP, 1) + Reducer 5 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: part + filterExpr: UDFToDouble(p_size) is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(p_size) is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), UDFToDouble(p_size) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: double) + Execution mode: vectorized + Map 3 Map Operator Tree: TableScan alias: tempty_n0 @@ -268,7 +290,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized - Map 4 + Map 6 Map Operator Tree: TableScan alias: tempty_n0 @@ -287,26 +309,31 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: double) Execution mode: vectorized - Map 5 - Map Operator Tree: - TableScan - alias: part - filterExpr: UDFToDouble(p_size) is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(p_size) is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), UDFToDouble(p_size) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: double) - Execution mode: vectorized Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col11 + Statistics: Num rows: 26 Data size: 3407 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (_col9 > _col11) (type: boolean) + Statistics: Num rows: 8 Data size: 1048 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 8 Data size: 1048 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 1048 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -323,32 +350,21 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Reducer 3 + Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 26 Data size: 3381 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (_col11 > _col1) (type: boolean) - Statistics: Num rows: 8 Data size: 1040 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 1040 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 1040 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 9 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 9 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: double) Stage: Stage-0 Fetch Operator @@ -4212,12 +4228,34 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: lineitem + filterExpr: (l_partkey is not null and l_quantity is not null) (type: boolean) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (l_partkey is not null and l_quantity is not null) (type: boolean) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_partkey (type: int), l_quantity (type: double), l_extendedprice (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Execution mode: vectorized + Map 4 Map Operator Tree: TableScan alias: part @@ -4237,7 +4275,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 4 + Map 6 Map Operator Tree: TableScan alias: lineitem @@ -4261,48 +4299,25 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized - Map 6 - Map Operator Tree: - TableScan - alias: lineitem - filterExpr: (l_partkey is not null and l_quantity is not null) (type: boolean) - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (l_partkey is not null and l_quantity is not null) (type: boolean) - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_partkey (type: int), l_quantity (type: double), l_extendedprice (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) - Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col2, _col4, _col5 - Statistics: Num rows: 220 Data size: 26397 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 110 Data size: 13198 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col4 > _col2) (type: boolean) - Statistics: Num rows: 73 Data size: 8759 Basic stats: COMPLETE Column stats: NONE + predicate: (_col1 > _col5) (type: boolean) + Statistics: Num rows: 36 Data size: 4319 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: double) - outputColumnNames: _col5 - Statistics: Num rows: 73 Data size: 8759 Basic stats: COMPLETE Column stats: NONE + expressions: _col2 (type: double) + outputColumnNames: _col2 + Statistics: Num rows: 36 Data size: 4319 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col5) + aggregations: sum(_col2) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 @@ -4328,6 +4343,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -5743,7 +5775,8 @@ having count(*) > (select count(*) from src s1 where s1.key > '9' ) POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -Warning: Shuffle Join JOIN[26][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[25][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 5' is a cross product +Warning: Shuffle Join JOIN[29][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where p_size > (select max(p_size) from part group by p_type) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -5760,13 +5793,33 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 4 (GROUP, 2) - Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) + Reducer 7 <- Map 6 (GROUP, 2) + Reducer 8 <- Reducer 7 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: part + filterExpr: p_size is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_size is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized + Map 3 Map Operator Tree: TableScan alias: part @@ -5790,7 +5843,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map 4 + Map 6 Map Operator Tree: TableScan alias: part @@ -5812,26 +5865,31 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: part - filterExpr: p_size is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_size is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 338 Data size: 85189 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col5 > _col9) (type: boolean) + Statistics: Num rows: 112 Data size: 28228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 112 Data size: 28228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 112 Data size: 28228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -5856,33 +5914,22 @@ STAGE PLANS: sort order: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Reducer 3 + Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 1 - 2 - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 338 Data size: 84851 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col7 > _col0) (type: boolean) - Statistics: Num rows: 112 Data size: 28116 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 112 Data size: 28116 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 112 Data size: 28116 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1690 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 13 Data size: 1690 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -5903,7 +5950,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 6 + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/subquery_select.q.out b/ql/src/test/results/clientpositive/spark/subquery_select.q.out index cbd33210d2..16264a2ffd 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_select.q.out @@ -5028,7 +5028,7 @@ POSTHOOK: Input: default@part 6 28 6 28 7 28 -Warning: Shuffle Join JOIN[47][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[48][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select t1.p_size, (select count(*) from part p, part pp where p.p_size = pp.p_size and p.p_type = pp.p_type and (select sum(p_size) from part a1 where a1.p_partkey = p.p_partkey @@ -5053,13 +5053,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: + Reducer 10 <- Map 9 (GROUP PARTITION-LEVEL SORT, 2) Reducer 12 <- Map 11 (GROUP, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 10 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Reducer 12 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Reducer 6 (GROUP, 1) - Reducer 9 <- Map 8 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 12 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 10 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -5077,26 +5077,6 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 10 - Map Operator Tree: - TableScan - alias: pp - filterExpr: (p_size is not null and p_type is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_size is not null and p_type is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_type (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Map 11 Map Operator Tree: TableScan @@ -5122,6 +5102,26 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized Map 3 + Map Operator Tree: + TableScan + alias: pp + filterExpr: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 7 Map Operator Tree: TableScan alias: p @@ -5142,7 +5142,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int) Execution mode: vectorized - Map 8 + Map 9 Map Operator Tree: TableScan alias: a1 @@ -5164,6 +5164,33 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Reducer 10 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col1) <= 1) (type: boolean) + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE Reducer 12 Execution mode: vectorized Reduce Operator Tree: @@ -5208,39 +5235,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string), _col2 (type: int) - 1 _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0 + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col2 (type: int) + outputColumnNames: _col2 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + Select Operator + expressions: _col2 (type: int) + outputColumnNames: _col0 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Reducer 6 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Reducer 5 Reduce Operator Tree: Join Operator condition map: @@ -5260,7 +5274,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 7 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -5273,33 +5287,23 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 9 - Execution mode: vectorized + Reducer 8 Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col1) <= 1) (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Stage: Stage-0 Fetch Operator @@ -5307,7 +5311,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[47][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[48][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: select t1.p_size, (select count(*) from part p, part pp where p.p_size = pp.p_size and p.p_type = pp.p_type and (select sum(p_size) from part a1 where a1.p_partkey = p.p_partkey @@ -5350,7 +5354,7 @@ POSTHOOK: Input: default@part 6 28 6 28 7 28 -Warning: Shuffle Join JOIN[71][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[72][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Work 'Reducer 3' is a cross product PREHOOK: query: explain select t1.p_size, (select count(*) from part t2 where t2.p_partkey = t1.p_partkey group by t2.p_partkey), (select count(*) from part p, part pp where p.p_size = pp.p_size and p.p_type = pp.p_type @@ -5377,16 +5381,16 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 15 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) - Reducer 11 <- Reducer 10 (PARTITION-LEVEL SORT, 2), Reducer 17 (PARTITION-LEVEL SORT, 2) - Reducer 12 <- Reducer 11 (GROUP, 1) - Reducer 14 <- Map 13 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 10 <- Reducer 17 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 11 <- Reducer 10 (GROUP, 1) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 2), Reducer 15 (PARTITION-LEVEL SORT, 2) + Reducer 15 <- Map 14 (GROUP PARTITION-LEVEL SORT, 2) Reducer 17 <- Map 16 (GROUP, 2) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 12 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 11 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) Reducer 7 <- Map 6 (GROUP, 2) - Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 14 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 13 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -5406,20 +5410,18 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map 13 + Map 12 Map Operator Tree: TableScan - alias: a1 - filterExpr: p_partkey is not null (type: boolean) + alias: p + filterExpr: (p_size is not null and p_partkey is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: (p_size is not null and p_partkey is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_partkey (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 + Select Operator + expressions: p_partkey (type: int), p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -5427,25 +5429,28 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int) Execution mode: vectorized - Map 15 + Map 14 Map Operator Tree: TableScan - alias: pp - filterExpr: (p_size is not null and p_type is not null) (type: boolean) + alias: a1 + filterExpr: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_size is not null and p_type is not null) (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_type (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 + Group By Operator + keys: p_partkey (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 16 @@ -5521,41 +5526,24 @@ STAGE PLANS: Map 8 Map Operator Tree: TableScan - alias: p - filterExpr: (p_size is not null and p_partkey is not null and p_type is not null) (type: boolean) + alias: pp + filterExpr: (p_size is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_size is not null and p_partkey is not null and p_type is not null) (type: boolean) + predicate: (p_size is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_type (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) + key expressions: _col0 (type: string), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: int) Execution mode: vectorized Reducer 10 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string), _col2 (type: int) - 1 _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Reducer 11 Reduce Operator Tree: Join Operator condition map: @@ -5575,7 +5563,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 12 + Reducer 11 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -5588,7 +5576,24 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 14 + Reducer 13 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 15 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -5730,19 +5735,22 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + 0 _col0 (type: string), _col1 (type: int) + 1 _col1 (type: string), _col2 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -5750,7 +5758,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[71][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[72][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Work 'Reducer 3' is a cross product PREHOOK: query: select t1.p_size, (select count(*) from part t2 where t2.p_partkey = t1.p_partkey group by t2.p_partkey), (select count(*) from part p, part pp where p.p_size = pp.p_size and p.p_type = pp.p_type diff --git a/ql/src/test/results/clientpositive/tez/tez-tag.q.out b/ql/src/test/results/clientpositive/tez/tez-tag.q.out index d070ef611c..faf4e2a9ce 100644 --- a/ql/src/test/results/clientpositive/tez/tez-tag.q.out +++ b/ql/src/test/results/clientpositive/tez/tez-tag.q.out @@ -198,55 +198,55 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 - File Output Operator [FS_20] - Group By Operator [GBY_18] (rows=1 width=8) + Reducer 3 + File Output Operator [FS_21] + Group By Operator [GBY_19] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_17] - Group By Operator [GBY_16] (rows=1 width=8) + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_30] (rows=64 width=8) - Conds:RS_12._col0=RS_13._col0(Inner) - <-Map 6 [SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_31] (rows=64 width=8) + Conds:RS_13._col0=RS_14._col0(Inner) + <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=500 width=4) + Select Operator [SEL_2] (rows=500 width=4) Output:["_col0"] - Filter Operator [FIL_28] (rows=500 width=4) + Filter Operator [FIL_27] (rows=500 width=4) predicate:key is not null - TableScan [TS_6] (rows=500 width=4) + TableScan [TS_0] (rows=500 width=4) default@tab_part_n6,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_14] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_29] (rows=39 width=4) + Merge Join Operator [MERGEJOIN_30] (rows=39 width=4) Conds:RS_9._col1=RS_10._col0(Inner),Output:["_col0"] - <-Map 1 [SIMPLE_EDGE] + <-Map 4 [SIMPLE_EDGE] SHUFFLE [RS_9] PartitionCols:_col1 - Select Operator [SEL_2] (rows=242 width=95) + Select Operator [SEL_5] (rows=242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_26] (rows=242 width=95) + Filter Operator [FIL_28] (rows=242 width=95) predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242 width=95) + TableScan [TS_3] (rows=242 width=95) default@tab_n5,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 5 [SIMPLE_EDGE] + <-Map 6 [SIMPLE_EDGE] SHUFFLE [RS_10] PartitionCols:_col0 - Select Operator [SEL_5] (rows=25 width=89) + Select Operator [SEL_8] (rows=25 width=89) Output:["_col0"] - Filter Operator [FIL_27] (rows=25 width=89) + Filter Operator [FIL_29] (rows=25 width=89) predicate:value is not null - TableScan [TS_3] (rows=25 width=89) + TableScan [TS_6] (rows=25 width=89) default@src1,c,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: select count(*) from tab_n5 a join tab_part_n6 b on a.key = b.key join src1 c on a.value = c.value