diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index c55f8db61a..7825774244 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -605,6 +605,7 @@ minillaplocal.query.files=\ join_nullsafe.q,\ join_is_not_distinct_from.q,\ join_reordering_no_stats.q,\ + join_reorder5.q,\ kryo.q,\ leftsemijoin_mr.q,\ limit_join_transpose.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java index 492c55e050..ee3242d40c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java @@ -29,20 +29,54 @@ public class HiveJoinProjectTransposeRule extends JoinProjectTransposeRule { public static final HiveJoinProjectTransposeRule LEFT_PROJECT_BTW_JOIN = - new HiveJoinProjectTransposeRule( + new HiveJoinLeftProjectBtwJoinTransposeRule(); + + private static final class HiveJoinLeftProjectBtwJoinTransposeRule extends HiveJoinProjectTransposeRule { + + private HiveJoinLeftProjectBtwJoinTransposeRule() { + super( operand(HiveJoin.class, - operand(HiveProject.class, operand(HiveJoin.class, any())), - operand(RelNode.class, any())), + operand(HiveProject.class, operand(HiveJoin.class, any())), + operand(RelNode.class, any())), "JoinProjectTransposeRule(Project-Join-Other)", false, HiveRelFactories.HIVE_BUILDER); + } + + @Override + protected boolean hasLeftChild(RelOptRuleCall call) { + return true; + } + + @Override + protected boolean hasRightChild(RelOptRuleCall call) { + return false; + } + } public static final HiveJoinProjectTransposeRule RIGHT_PROJECT_BTW_JOIN = - new HiveJoinProjectTransposeRule( + new HiveJoinRightProjectBtwJoinTransposeRule(); + + private static final class HiveJoinRightProjectBtwJoinTransposeRule extends HiveJoinProjectTransposeRule { + + private HiveJoinRightProjectBtwJoinTransposeRule() { + super( operand(HiveJoin.class, - operand(RelNode.class, any()), - operand(HiveProject.class, operand(HiveJoin.class, any()))), + operand(RelNode.class, any()), + operand(HiveProject.class, operand(HiveJoin.class, any()))), "JoinProjectTransposeRule(Other-Project-Join)", false, HiveRelFactories.HIVE_BUILDER); + } + + @Override + protected boolean hasLeftChild(RelOptRuleCall call) { + return false; + } + + @Override + protected boolean hasRightChild(RelOptRuleCall call) { + return true; + } + } public static final HiveJoinProjectTransposeRule BOTH_PROJECT = new HiveJoinProjectTransposeRule( diff --git ql/src/test/queries/clientpositive/join_reorder5.q ql/src/test/queries/clientpositive/join_reorder5.q new file mode 100644 index 0000000000..08269ea090 --- /dev/null +++ ql/src/test/queries/clientpositive/join_reorder5.q @@ -0,0 +1,29 @@ +-- Project shouldn't contain unnecessary fields in CBO plan. +-- Instead of +-- HiveProject(wr_order_number=[$0], wr_returned_time_sk=[$1], wr_return_quantity=[$2], BLOCK__OFFSET__INSIDE__FILE=[$3], INPUT__FILE__NAME=[$4], ROW__ID=[$5]) +-- Project should look like +-- HiveProject(wr_order_number=[$0]) + +create table web_sales ( + ws_order_number int +); + +create table web_returns ( + wr_order_number int, + wr_returned_time_sk timestamp, + wr_return_quantity int +); + +explain cbo +with ws_wh as + (select ws1.ws_order_number + from web_sales ws1,web_returns wr2 + where ws1.ws_order_number = wr2.wr_order_number) +select + ws_order_number +from + web_sales ws1 +where +ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number); diff --git ql/src/test/results/clientpositive/auto_join22.q.out ql/src/test/results/clientpositive/auto_join22.q.out index 5a98716fed..a8932695ad 100644 --- ql/src/test/results/clientpositive/auto_join22.q.out +++ ql/src/test/results/clientpositive/auto_join22.q.out @@ -17,14 +17,14 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:src1 + $hdt$_0:$hdt$_0:src1 Fetch Operator limit: -1 - $hdt$_0:$hdt$_1:src4 + $hdt$_0:$hdt$_2:src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:src1 + $hdt$_0:$hdt$_0:src1 TableScan alias: src1 filterExpr: key is not null (type: boolean) @@ -40,9 +40,9 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_0:$hdt$_1:src4 + $hdt$_0:$hdt$_2:src2 TableScan - alias: src4 + alias: src2 filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -51,7 +51,7 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 177500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -61,7 +61,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src4 filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator diff --git ql/src/test/results/clientpositive/correlationoptimizer5.q.out ql/src/test/results/clientpositive/correlationoptimizer5.q.out index 2e9e6027ae..1505d7ce96 100644 --- ql/src/test/results/clientpositive/correlationoptimizer5.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer5.q.out @@ -126,24 +126,25 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x + alias: n filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan - alias: y + alias: m filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -166,8 +167,8 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -180,35 +181,35 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: string) + expressions: _col3 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -233,25 +234,24 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: n + alias: x filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan - alias: m + alias: y filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -274,18 +274,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_co1 SELECT b.key, d.val @@ -353,24 +349,25 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x + alias: n filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan - alias: y + alias: m filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -387,25 +384,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan - alias: n + alias: x filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan - alias: m + alias: y filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -430,7 +426,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 + outputColumnNames: _col1, _col2 Statistics: Num rows: 4 Data size: 220 Basic stats: COMPLETE Column stats: NONE Mux Operator Statistics: Num rows: 8 Data size: 440 Basic stats: COMPLETE Column stats: NONE @@ -438,12 +434,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3 + outputColumnNames: _col1, _col3 Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: string) + expressions: _col3 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -460,34 +456,30 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0 Statistics: Num rows: 4 Data size: 220 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 220 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 8 Data size: 440 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 + Mux Operator + Statistics: Num rows: 8 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co2 Stage: Stage-0 Move Operator @@ -577,13 +569,13 @@ STAGE PLANS: Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:y + $hdt$_1:m Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:y + $hdt$_1:m TableScan - alias: y + alias: m filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -602,24 +594,24 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x + alias: n filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -644,7 +636,7 @@ STAGE PLANS: TableScan HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Stage: Stage-7 @@ -655,17 +647,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: string) + expressions: _col3 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -700,7 +692,7 @@ STAGE PLANS: TableScan HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Stage: Stage-8 @@ -711,17 +703,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: string) + expressions: _col3 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -736,35 +728,35 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: string) + expressions: _col3 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -774,13 +766,13 @@ STAGE PLANS: Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_2:$hdt$_3:m + $hdt$_2:$hdt$_3:y Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_2:$hdt$_3:m + $hdt$_2:$hdt$_3:y TableScan - alias: m + alias: y filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -799,34 +791,30 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: n + alias: x filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/filter_cond_pushdown.q.out ql/src/test/results/clientpositive/filter_cond_pushdown.q.out index 74a7aa89e7..7bce5e9667 100644 --- ql/src/test/results/clientpositive/filter_cond_pushdown.q.out +++ ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -297,12 +297,12 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -353,20 +353,23 @@ STAGE PLANS: Filter Operator predicate: (((_col1 + _col4) > 2.0) or _col2) (type: boolean) Statistics: Num rows: 5 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: t1 filterExpr: (UDFToDouble(key) = 1.0D) (type: boolean) @@ -377,20 +380,13 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 5370 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 5370 Basic stats: COMPLETE Column stats: COMPLETE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -398,15 +394,19 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 + outputColumnNames: _col5 Statistics: Num rows: 12 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Select Operator + expressions: _col5 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/join22.q.out ql/src/test/results/clientpositive/join22.q.out index ad34bc4310..109779db50 100644 --- ql/src/test/results/clientpositive/join22.q.out +++ ql/src/test/results/clientpositive/join22.q.out @@ -36,7 +36,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) TableScan - alias: src2 + alias: src4 filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -80,7 +80,7 @@ STAGE PLANS: Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) TableScan - alias: src4 + alias: src2 filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -89,13 +89,13 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 177500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 177500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: diff --git ql/src/test/results/clientpositive/llap/correlationoptimizer3.q.out ql/src/test/results/clientpositive/llap/correlationoptimizer3.q.out index f063766a1f..37b96adf32 100644 --- ql/src/test/results/clientpositive/llap/correlationoptimizer3.q.out +++ ql/src/test/results/clientpositive/llap/correlationoptimizer3.q.out @@ -27,81 +27,81 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: y + alias: x filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 7 + Map 5 Map Operator Tree: TableScan alias: x filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 8 Map Operator Tree: TableScan - alias: x + alias: y filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -113,19 +113,15 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col3 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -167,7 +163,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -192,7 +188,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 6 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -263,81 +259,81 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: y + alias: x filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 7 + Map 5 Map Operator Tree: TableScan alias: x filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 8 Map Operator Tree: TableScan - alias: x + alias: y filterExpr: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -349,19 +345,15 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col3 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -403,7 +395,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -428,7 +420,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 6 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -499,67 +491,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) - Map 4 <- Map 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Map 2 <- Map 4 (BROADCAST_EDGE) + Map 5 <- Reducer 3 (BROADCAST_EDGE) + Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: y - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 3 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: hash(_col0) (type: int), hash(_col3) (type: int), hash(_col1) (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0), sum(_col1), sum(_col2) - minReductionHashAggr: 0.974359 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 3 Map Operator Tree: TableScan alias: x @@ -581,7 +519,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: y @@ -602,7 +540,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 6 + 1 Map 4 Statistics: Num rows: 39 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -620,7 +558,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 4 Map Operator Tree: TableScan alias: x @@ -641,7 +579,73 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 + Map 5 + Map Operator Tree: + TableScan + alias: y + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 3 + Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col3) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 39 Data size: 7137 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2) + minReductionHashAggr: 0.974359 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col3 (type: bigint) + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -656,22 +660,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/join_reorder5.q.out ql/src/test/results/clientpositive/llap/join_reorder5.q.out new file mode 100644 index 0000000000..5cc02876f2 --- /dev/null +++ ql/src/test/results/clientpositive/llap/join_reorder5.q.out @@ -0,0 +1,81 @@ +PREHOOK: query: create table web_sales ( + ws_order_number int +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@web_sales +POSTHOOK: query: create table web_sales ( + ws_order_number int +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@web_sales +PREHOOK: query: create table web_returns ( + wr_order_number int, + wr_returned_time_sk timestamp, + wr_return_quantity int +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@web_returns +POSTHOOK: query: create table web_returns ( + wr_order_number int, + wr_returned_time_sk timestamp, + wr_return_quantity int +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@web_returns +PREHOOK: query: explain cbo +with ws_wh as + (select ws1.ws_order_number + from web_sales ws1,web_returns wr2 + where ws1.ws_order_number = wr2.wr_order_number) +select + ws_order_number +from + web_sales ws1 +where +ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +PREHOOK: type: QUERY +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +with ws_wh as + (select ws1.ws_order_number + from web_sales ws1,web_returns wr2 + where ws1.ws_order_number = wr2.wr_order_number) +select + ws_order_number +from + web_sales ws1 +where +ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +#### A masked pattern was here #### +CBO PLAN: +HiveProject(ws_order_number=[$0]) + HiveSemiJoin(condition=[=($0, $1)], joinType=[semi]) + HiveProject(ws_order_number=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(wr_order_number=[$0]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_order_number=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(ws_order_number=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(wr_order_number=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[wr2]) + diff --git ql/src/test/results/clientpositive/llap/keep_uniform.q.out ql/src/test/results/clientpositive/llap/keep_uniform.q.out index 54d0b5fab6..fba97d5acd 100644 --- ql/src/test/results/clientpositive/llap/keep_uniform.q.out +++ ql/src/test/results/clientpositive/llap/keep_uniform.q.out @@ -437,15 +437,16 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) Reducer 13 <- Reducer 12 (SIMPLE_EDGE) - Reducer 14 <- Map 17 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) - Reducer 15 <- Reducer 14 (SIMPLE_EDGE) + Reducer 14 <- Map 11 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) + Reducer 15 <- Map 11 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) + Reducer 16 <- Reducer 15 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Map 18 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 5 <- Reducer 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Map 19 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### @@ -593,6 +594,32 @@ STAGE PLANS: valueColumns: 15:int Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 17:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 15:int + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 17:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 15:int + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -610,7 +637,7 @@ STAGE PLANS: dataColumns: ws_sold_date_sk:int, ws_sold_time_sk:int, ws_ship_date_sk:int, ws_item_sk:int, ws_bill_customer_sk:int, ws_bill_cdemo_sk:int, ws_bill_hdemo_sk:int, ws_bill_addr_sk:int, ws_ship_customer_sk:int, ws_ship_cdemo_sk:int, ws_ship_hdemo_sk:int, ws_ship_addr_sk:int, ws_web_page_sk:int, ws_web_site_sk:int, ws_ship_mode_sk:int, ws_warehouse_sk:int, ws_promo_sk:int, ws_order_number:int, ws_quantity:int, ws_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_list_price:decimal(7,2)/DECIMAL_64, ws_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_discount_amt:decimal(7,2)/DECIMAL_64, ws_ext_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_ext_list_price:decimal(7,2)/DECIMAL_64, ws_ext_tax:decimal(7,2)/DECIMAL_64, ws_coupon_amt:decimal(7,2)/DECIMAL_64, ws_ext_ship_cost:decimal(7,2)/DECIMAL_64, ws_net_paid:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_tax:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship_tax:decimal(7,2)/DECIMAL_64, ws_net_profit:decimal(7,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 16 + Map 17 Map Operator Tree: TableScan alias: ws2 @@ -664,7 +691,7 @@ STAGE PLANS: dataColumns: ws_sold_date_sk:int, ws_sold_time_sk:int, ws_ship_date_sk:int, ws_item_sk:int, ws_bill_customer_sk:int, ws_bill_cdemo_sk:int, ws_bill_hdemo_sk:int, ws_bill_addr_sk:int, ws_ship_customer_sk:int, ws_ship_cdemo_sk:int, ws_ship_hdemo_sk:int, ws_ship_addr_sk:int, ws_web_page_sk:int, ws_web_site_sk:int, ws_ship_mode_sk:int, ws_warehouse_sk:int, ws_promo_sk:int, ws_order_number:int, ws_quantity:int, ws_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_list_price:decimal(7,2)/DECIMAL_64, ws_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_discount_amt:decimal(7,2)/DECIMAL_64, ws_ext_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_ext_list_price:decimal(7,2)/DECIMAL_64, ws_ext_tax:decimal(7,2)/DECIMAL_64, ws_coupon_amt:decimal(7,2)/DECIMAL_64, ws_ext_ship_cost:decimal(7,2)/DECIMAL_64, ws_net_paid:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_tax:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship_tax:decimal(7,2)/DECIMAL_64, ws_net_profit:decimal(7,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 17 + Map 18 Map Operator Tree: TableScan alias: web_returns @@ -682,17 +709,17 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: wr_order_number (type: int) - outputColumnNames: _col13 + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [13] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col13 (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col13 (type: int) + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator keyColumns: 13:int @@ -716,7 +743,7 @@ STAGE PLANS: dataColumns: wr_returned_date_sk:int, wr_returned_time_sk:int, wr_item_sk:int, wr_refunded_customer_sk:int, wr_refunded_cdemo_sk:int, wr_refunded_hdemo_sk:int, wr_refunded_addr_sk:int, wr_returning_customer_sk:int, wr_returning_cdemo_sk:int, wr_returning_hdemo_sk:int, wr_returning_addr_sk:int, wr_web_page_sk:int, wr_reason_sk:int, wr_order_number:int, wr_return_quantity:int, wr_return_amt:decimal(7,2)/DECIMAL_64, wr_return_tax:decimal(7,2)/DECIMAL_64, wr_return_amt_inc_tax:decimal(7,2)/DECIMAL_64, wr_fee:decimal(7,2)/DECIMAL_64, wr_return_ship_cost:decimal(7,2)/DECIMAL_64, wr_refunded_cash:decimal(7,2)/DECIMAL_64, wr_reversed_charge:decimal(7,2)/DECIMAL_64, wr_account_credit:decimal(7,2)/DECIMAL_64, wr_net_loss:decimal(7,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 18 + Map 19 Map Operator Tree: TableScan alias: date_dim @@ -848,16 +875,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false @@ -907,26 +924,52 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col13 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col13 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col13 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 15 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col2, _col3 + residual filter predicates: {(_col0 <> _col3)} + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 16 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -953,7 +996,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -964,7 +1007,7 @@ STAGE PLANS: keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 2 Execution mode: llap Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/sharedwork.q.out ql/src/test/results/clientpositive/llap/sharedwork.q.out index f8d3b4b2f5..6b551a6ada 100644 --- ql/src/test/results/clientpositive/llap/sharedwork.q.out +++ ql/src/test/results/clientpositive/llap/sharedwork.q.out @@ -633,28 +633,28 @@ WHERE (`t14`.`$f2` IS NULL OR `t6`.`c` = 0 OR `t6`.`c` IS NULL) POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -OPTIMIZED SQL: SELECT `p_name` -FROM (SELECT `part`.`p_name`, `part`.`p_type`, `part`.`p_size` + 1 AS `size`, `t5`.`size` AS `size0`, `t5`.`c`, `t5`.`ck`, `t12`.`p_type` AS `p_type0`, `t12`.`size` AS `size1`, `t12`.`$f2` -FROM `default`.`part` -LEFT JOIN (SELECT `t4`.`$f0` AS `size`, `t1`.`$f1` AS `c`, `t1`.`$f2` AS `ck` +OPTIMIZED SQL: SELECT `t`.`p_name` +FROM (SELECT `p_name`, `p_type`, `p_size` + 1 AS `size` +FROM `default`.`part`) AS `t` +LEFT JOIN (SELECT `t5`.`$f0` AS `size`, `t2`.`$f1` AS `c`, `t2`.`$f2` AS `ck` FROM (SELECT `p_size` + 1 AS `$f0`, COUNT(*) AS `$f1`, COUNT(`p_type`) AS `$f2` FROM `default`.`part` WHERE `p_size` IS NOT NULL -GROUP BY `p_size` + 1) AS `t1` +GROUP BY `p_size` + 1) AS `t2` INNER JOIN (SELECT `p_size` + 1 AS `$f0` FROM `default`.`part` WHERE `p_size` IS NOT NULL -GROUP BY `p_size` + 1) AS `t4` ON `t1`.`$f0` = `t4`.`$f0`) AS `t5` ON `part`.`p_size` + 1 = `t5`.`size` -LEFT JOIN (SELECT `t8`.`$f0` AS `p_type`, `t11`.`$f0` AS `size`, TRUE AS `$f2` +GROUP BY `p_size` + 1) AS `t5` ON `t2`.`$f0` = `t5`.`$f0`) AS `t6` ON `t`.`size` = `t6`.`size` +LEFT JOIN (SELECT `t9`.`$f0` AS `p_type`, `t12`.`$f0` AS `size`, TRUE AS `$f2` FROM (SELECT `p_type` AS `$f0`, `p_size` + 1 AS `$f1` FROM `default`.`part` WHERE `p_size` IS NOT NULL AND `p_type` IS NOT NULL -GROUP BY `p_type`, `p_size` + 1) AS `t8` +GROUP BY `p_type`, `p_size` + 1) AS `t9` INNER JOIN (SELECT `p_size` + 1 AS `$f0` FROM `default`.`part` WHERE `p_size` IS NOT NULL -GROUP BY `p_size` + 1) AS `t11` ON `t8`.`$f1` = `t11`.`$f0`) AS `t12` ON `part`.`p_type` = `t12`.`p_type` AND `part`.`p_size` + 1 = `t12`.`size`) AS `t13` -WHERE (`t13`.`$f2` IS NULL OR `t13`.`c` = 0 OR `t13`.`c` IS NULL) AND (`t13`.`ck` < (`t13`.`c` IS NOT TRUE) OR `t13`.`c` = 0 OR `t13`.`c` IS NULL OR `t13`.`$f2` IS NOT NULL OR `t13`.`p_type` IS NULL) AND (`t13`.`p_type` IS NOT NULL OR `t13`.`c` = 0 OR `t13`.`c` IS NULL OR `t13`.`$f2` IS NOT NULL) +GROUP BY `p_size` + 1) AS `t12` ON `t9`.`$f1` = `t12`.`$f0`) AS `t13` ON `t`.`p_type` = `t13`.`p_type` AND `t`.`size` = `t13`.`size` +WHERE (`t13`.`$f2` IS NULL OR `t6`.`c` = 0 OR `t6`.`c` IS NULL) AND (`t6`.`ck` < (`t6`.`c` IS NOT TRUE) OR `t6`.`c` = 0 OR `t6`.`c` IS NULL OR `t13`.`$f2` IS NOT NULL OR `t`.`p_type` IS NULL) AND (`t`.`p_type` IS NOT NULL OR `t6`.`c` = 0 OR `t6`.`c` IS NULL OR `t13`.`$f2` IS NOT NULL) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -664,67 +664,33 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 5 <- Map 1 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 9 <- Map 7 (SIMPLE_EDGE) + Reducer 10 <- Map 8 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Reducer 6 <- Map 4 (SIMPLE_EDGE) + Reducer 7 <- Reducer 10 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: part - filterExpr: (p_size is not null or (p_size is not null and p_type is not null)) (type: boolean) - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_size is not null (type: boolean) - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: (p_size + 1) (type: int), p_type (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(), count(_col1) - keys: _col0 (type: int) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col1 (type: bigint), _col2 (type: bigint) - auto parallelism: true - Filter Operator - isSamplingPred: false - predicate: (p_size is not null and p_type is not null) (type: boolean) - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_type (type: string), (p_size + 1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: int), _col0 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - auto parallelism: true + Select Operator + expressions: p_name (type: string), p_type (type: string), (p_size + 1) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col0 (type: string), _col1 (type: string) + auto parallelism: true Execution mode: vectorized, llap LLAP IO: no inputs Path -> Alias: @@ -780,21 +746,59 @@ STAGE PLANS: name: default.part Truncated Path -> Alias: /part [part] - Map 10 + Map 4 Map Operator Tree: TableScan alias: part - Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + filterExpr: (p_size is not null or (p_size is not null and p_type is not null)) (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false - Reduce Output Operator - key expressions: (p_size + 1) (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: (p_size + 1) (type: int) - Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: p_name (type: string), p_type (type: string), p_size (type: int) - auto parallelism: true + Filter Operator + isSamplingPred: false + predicate: p_size is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (p_size + 1) (type: int), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col1) + keys: _col0 (type: int) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: bigint), _col2 (type: bigint) + auto parallelism: true + Filter Operator + isSamplingPred: false + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), (p_size + 1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: int), _col0 (type: string) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + auto parallelism: true Execution mode: vectorized, llap LLAP IO: no inputs Path -> Alias: @@ -850,7 +854,7 @@ STAGE PLANS: name: default.part Truncated Path -> Alias: /part [part] - Map 7 + Map 8 Map Operator Tree: TableScan alias: part @@ -942,7 +946,89 @@ STAGE PLANS: name: default.part Truncated Path -> Alias: /part [part] + Reducer 10 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + tag: 1 + auto parallelism: true Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Position of Big Table: 0 + Statistics: Num rows: 42 Data size: 9890 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 42 Data size: 9890 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col0 (type: string), _col4 (type: bigint), _col5 (type: bigint) + auto parallelism: true + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col4, _col5, _col8 + Position of Big Table: 0 + Statistics: Num rows: 55 Data size: 12947 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + isSamplingPred: false + predicate: ((_col8 is null or (_col4 = 0L) or _col4 is null) and ((_col5 < _col4 is not true) or (_col4 = 0L) or _col4 is null or _col8 is not null or _col1 is null) and (_col1 is not null or (_col4 = 0L) or _col4 is null or _col8 is not null)) (type: boolean) + Statistics: Num rows: 55 Data size: 12947 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 6655 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 55 Data size: 6655 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 5 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -981,76 +1067,7 @@ STAGE PLANS: tag: 1 value expressions: _col1 (type: bigint), _col2 (type: bigint) auto parallelism: true - Reducer 3 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 (p_size + 1) (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col4, _col5, _col13, _col14 - Position of Big Table: 0 - Statistics: Num rows: 32 Data size: 7600 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col4 (type: string), (_col5 + 1) (type: int) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col4 (type: string), (_col5 + 1) (type: int) - Statistics: Num rows: 32 Data size: 7600 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col1 (type: string), _col13 (type: bigint), _col14 (type: bigint) - auto parallelism: true - Reducer 4 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col4 (type: string), (_col5 + 1) (type: int) - 1 _col0 (type: string), _col1 (type: int) - outputColumnNames: _col1, _col4, _col13, _col14, _col17 - Position of Big Table: 0 - Statistics: Num rows: 39 Data size: 9231 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col4 (type: string), _col13 (type: bigint), _col14 (type: bigint), _col17 (type: boolean) - outputColumnNames: _col0, _col1, _col4, _col5, _col8 - Statistics: Num rows: 39 Data size: 9387 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - isSamplingPred: false - predicate: ((_col8 is null or (_col4 = 0L) or _col4 is null) and ((_col5 < _col4 is not true) or (_col4 = 0L) or _col4 is null or _col8 is not null or _col1 is null) and (_col1 is not null or (_col4 = 0L) or _col4 is null or _col8 is not null)) (type: boolean) - Statistics: Num rows: 39 Data size: 9387 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 39 Data size: 4719 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 39 Data size: 4719 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Needs Tagging: false Reduce Operator Tree: @@ -1072,7 +1089,7 @@ STAGE PLANS: tag: 0 value expressions: _col0 (type: string) auto parallelism: true - Reducer 6 + Reducer 7 Execution mode: llap Needs Tagging: false Reduce Operator Tree: @@ -1098,23 +1115,6 @@ STAGE PLANS: tag: 1 value expressions: _col2 (type: boolean) auto parallelism: true - Reducer 9 - Execution mode: vectorized, llap - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - auto parallelism: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/subquery_select.q.out ql/src/test/results/clientpositive/llap/subquery_select.q.out index 311cee743d..e578246f0e 100644 --- ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -4385,7 +4385,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### true -Warning: Shuffle Join MERGEJOIN[56][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[55][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select o.p_size, (select count(distinct p_type) from part p where p.p_partkey = o.p_partkey) tmp FROM part o right join (select * from part where p_size > (select avg(p_size) from part)) t on t.p_partkey = o.p_partkey PREHOOK: type: QUERY @@ -4405,14 +4405,36 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (XPROD_EDGE), Reducer 2 (XPROD_EDGE) + Reducer 4 <- Map 6 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_size (type: int) + outputColumnNames: p_size + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(p_size), count(p_size) + minReductionHashAggr: 0.96153843 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 Map Operator Tree: TableScan alias: o @@ -4446,28 +4468,6 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: double) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_size (type: int) - outputColumnNames: p_size - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(p_size), count(p_size) - minReductionHashAggr: 0.96153843 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs Map 7 Map Operator Tree: TableScan @@ -4492,36 +4492,74 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 is not null and UDFToDouble(_col0) is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (UDFToDouble(_col0) / _col1) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Right Outer Join 0 to 1 + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col1 > _col2)} + Statistics: Num rows: 8 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1 + outputColumnNames: _col3, _col4 Statistics: Num rows: 16 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col3 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col3 (type: int) Statistics: Num rows: 16 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reducer 3 + value expressions: _col4 (type: int) + Reducer 5 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Left Outer Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col3 (type: int) 1 _col2 (type: int) - outputColumnNames: _col1, _col3, _col4 + outputColumnNames: _col4, _col5, _col6 Statistics: Num rows: 32 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: int), CASE WHEN (_col4 is null) THEN (0L) ELSE (_col3) END (type: bigint) + expressions: _col4 (type: int), CASE WHEN (_col6 is null) THEN (0L) ELSE (_col5) END (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 32 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -4531,48 +4569,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - residual filter predicates: {(_col1 > _col2)} - Statistics: Num rows: 8 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 is not null and UDFToDouble(_col0) is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: (UDFToDouble(_col0) / _col1) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double) Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: @@ -4605,7 +4601,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[56][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[55][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select o.p_size, (select count(distinct p_type) from part p where p.p_partkey = o.p_partkey) tmp FROM part o right join (select * from part where p_size > (select avg(p_size) from part)) t on t.p_partkey = o.p_partkey PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query2.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query2.q.out index 26a98ffcec..912855402a 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query2.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query2.q.out @@ -126,8 +126,8 @@ POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC]) - HiveProject(d_week_seq1=[$0], _o__c1=[round(/($1, $10), 2)], _o__c2=[round(/($2, $11), 2)], _o__c3=[round(/($3, $12), 2)], _o__c4=[round(/($4, $13), 2)], _o__c5=[round(/($5, $14), 2)], _o__c6=[round(/($6, $15), 2)], _o__c7=[round(/($7, $16), 2)]) - HiveJoin(condition=[=($0, -($9, 53))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_week_seq1=[$9], _o__c1=[round(/($10, $1), 2)], _o__c2=[round(/($11, $2), 2)], _o__c3=[round(/($12, $3), 2)], _o__c4=[round(/($13, $4), 2)], _o__c5=[round(/($14, $5), 2)], _o__c6=[round(/($15, $6), 2)], _o__c7=[round(/($16, $7), 2)]) + HiveJoin(condition=[=($9, -($0, 53))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)], agg#3=[sum($4)], agg#4=[sum($5)], agg#5=[sum($6)], agg#6=[sum($7)]) @@ -145,9 +145,9 @@ HiveSortLimit(sort0=[$0], dir0=[ASC]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($4))]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq2=[$0], sun_sales2=[$1], mon_sales2=[$2], tue_sales2=[$3], wed_sales2=[$4], thu_sales2=[$5], fri_sales2=[$6], sat_sales2=[$7]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], d_week_seq=[$8]) HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)], agg#3=[sum($4)], agg#4=[sum($5)], agg#5=[sum($6)], agg#6=[sum($7)]) @@ -165,6 +165,6 @@ HiveSortLimit(sort0=[$0], dir0=[ASC]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($4))]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out index abc5d999b5..b52967956f 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out @@ -94,31 +94,31 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) - HiveProject(s_store_name1=[$2], s_store_id1=[$1], d_week_seq1=[$3], _o__c3=[/($5, $15)], _o__c4=[/($6, $16)], _o__c5=[/($7, $7)], _o__c6=[/($8, $17)], _o__c7=[/($9, $18)], _o__c8=[/($10, $19)], _o__c9=[/($11, $20)]) - HiveJoin(condition=[AND(=($1, $14), =($3, -($13, 52)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($9, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8]) - HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)], agg#4=[sum($6)], agg#5=[sum($7)], agg#6=[sum($8)]) - HiveProject($f0=[$4], $f1=[$1], $f2=[CASE($5, $2, null:DECIMAL(7, 2))], $f3=[CASE($6, $2, null:DECIMAL(7, 2))], $f4=[CASE($7, $2, null:DECIMAL(7, 2))], $f5=[CASE($8, $2, null:DECIMAL(7, 2))], $f6=[CASE($9, $2, null:DECIMAL(7, 2))], $f7=[CASE($10, $2, null:DECIMAL(7, 2))], $f8=[CASE($11, $2, null:DECIMAL(7, 2))]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_week_seq=[$4], ==[=($14, _UTF-16LE'Sunday')], =3=[=($14, _UTF-16LE'Monday')], =4=[=($14, _UTF-16LE'Tuesday')], =5=[=($14, _UTF-16LE'Wednesday')], =6=[=($14, _UTF-16LE'Thursday')], =7=[=($14, _UTF-16LE'Friday')], =8=[=($14, _UTF-16LE'Saturday')]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1185, 1196), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d]) - HiveProject(d_week_seq2=[$2], s_store_id2=[$1], sun_sales2=[$4], mon_sales2=[$5], wed_sales2=[$6], thu_sales2=[$7], fri_sales2=[$8], sat_sales2=[$9]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0], s_store_id=[$1]) + HiveProject(s_store_name1=[$4], s_store_id1=[$3], d_week_seq1=[$5], _o__c3=[/($7, $17)], _o__c4=[/($8, $18)], _o__c5=[/($9, $9)], _o__c6=[/($10, $19)], _o__c7=[/($11, $20)], _o__c8=[/($12, $21)], _o__c9=[/($13, $22)]) + HiveJoin(condition=[AND(=($3, $1), =($16, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_id=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($3, -($13, 52))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($9, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8]) + HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)], agg#4=[sum($6)], agg#5=[sum($7)], agg#6=[sum($8)]) + HiveProject($f0=[$4], $f1=[$1], $f2=[CASE($5, $2, null:DECIMAL(7, 2))], $f3=[CASE($6, $2, null:DECIMAL(7, 2))], $f4=[CASE($7, $2, null:DECIMAL(7, 2))], $f5=[CASE($8, $2, null:DECIMAL(7, 2))], $f6=[CASE($9, $2, null:DECIMAL(7, 2))], $f7=[CASE($10, $2, null:DECIMAL(7, 2))], $f8=[CASE($11, $2, null:DECIMAL(7, 2))]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4], ==[=($14, _UTF-16LE'Sunday')], =3=[=($14, _UTF-16LE'Monday')], =4=[=($14, _UTF-16LE'Tuesday')], =5=[=($14, _UTF-16LE'Wednesday')], =6=[=($14, _UTF-16LE'Thursday')], =7=[=($14, _UTF-16LE'Friday')], =8=[=($14, _UTF-16LE'Saturday')]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1185, 1196), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], d_week_seq=[$8]) HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($5)], agg#3=[sum($6)], agg#4=[sum($7)], agg#5=[sum($8)]) diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query95.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query95.q.out index 218ca7d8b6..85462a4051 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query95.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query95.q.out @@ -76,19 +76,18 @@ CBO PLAN: HiveAggregate(group=[{}], agg#0=[count(DISTINCT $6)], agg#1=[sum($7)], agg#2=[sum($8)]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(wr_order_number=[$0]) - HiveAggregate(group=[{14}]) - HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_order_number=[$1]) - HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) - HiveFilter(condition=[IS NOT NULL($17)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) - HiveFilter(condition=[IS NOT NULL($17)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) - HiveProject(wr_returned_date_sk=[$0], wr_returned_time_sk=[$1], wr_item_sk=[$2], wr_refunded_customer_sk=[$3], wr_refunded_cdemo_sk=[$4], wr_refunded_hdemo_sk=[$5], wr_refunded_addr_sk=[$6], wr_returning_customer_sk=[$7], wr_returning_cdemo_sk=[$8], wr_returning_hdemo_sk=[$9], wr_returning_addr_sk=[$10], wr_web_page_sk=[$11], wr_reason_sk=[$12], wr_order_number=[$13], wr_return_quantity=[$14], wr_return_amt=[$15], wr_return_tax=[$16], wr_return_amt_inc_tax=[$17], wr_fee=[$18], wr_return_ship_cost=[$19], wr_refunded_cash=[$20], wr_reversed_charge=[$21], wr_account_credit=[$22], wr_net_loss=[$23], BLOCK__OFFSET__INSIDE__FILE=[$24], INPUT__FILE__NAME=[$25], ROW__ID=[$26]) - HiveFilter(condition=[IS NOT NULL($13)]) - HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveAggregate(group=[{4}]) + HiveJoin(condition=[AND(=($3, $1), <>($2, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveFilter(condition=[IS NOT NULL($17)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveFilter(condition=[IS NOT NULL($17)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(wr_order_number=[$13]) + HiveFilter(condition=[IS NOT NULL($13)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_order_number=[$0]) HiveAggregate(group=[{1}]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out index eaa1defa81..f2eb090488 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out @@ -1,6 +1,6 @@ -Warning: Shuffle Join MERGEJOIN[1196][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[1203][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[1210][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 22' is a cross product +Warning: Shuffle Join MERGEJOIN[1172][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[1179][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 18' is a cross product +Warning: Shuffle Join MERGEJOIN[1186][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 25' is a cross product PREHOOK: query: explain cbo with cross_items as (select i_item_sk ss_item_sk @@ -230,67 +230,66 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) HiveFilter(condition=[IS NOT NULL($3)]) HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) - HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[*(CAST($7):DECIMAL(10, 0), $8)]) - HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$13], $f1=[$14], $f2=[$15], $f3=[*(CAST($6):DECIMAL(10, 0), $7)]) + HiveJoin(condition=[=($5, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $9), =($2, $10), =($3, $11))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_quantity=[$10], ss_list_price=[$12]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$0]) - HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) - HiveFilter(condition=[=($3, 3)]) - HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) - HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3]) - HiveUnion(all=[true]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iss]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[ics]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iws]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_quantity=[$10], ss_list_price=[$12]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[CAST(/($0, $1)):DECIMAL(22, 6)]) HiveFilter(condition=[IS NOT NULL(CAST(/($0, $1)):DECIMAL(22, 6))]) HiveProject($f0=[$0], $f1=[$1]) @@ -326,67 +325,66 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) HiveFilter(condition=[IS NOT NULL($3)]) HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) - HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[*(CAST($7):DECIMAL(10, 0), $8)]) - HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$13], $f1=[$14], $f2=[$15], $f3=[*(CAST($6):DECIMAL(10, 0), $7)]) + HiveJoin(condition=[=($5, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $9), =($2, $10), =($3, $11))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$0]) - HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) - HiveFilter(condition=[=($3, 3)]) - HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) - HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3]) - HiveUnion(all=[true]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iss]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[ics]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iws]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15], cs_quantity=[$18], cs_list_price=[$20]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[CAST(/($0, $1)):DECIMAL(22, 6)]) HiveFilter(condition=[IS NOT NULL(CAST(/($0, $1)):DECIMAL(22, 6))]) HiveProject($f0=[$0], $f1=[$1]) @@ -422,67 +420,66 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) HiveFilter(condition=[IS NOT NULL($3)]) HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($3)], agg#1=[count()]) - HiveProject($f0=[$1], $f1=[$2], $f2=[$3], $f3=[*(CAST($7):DECIMAL(10, 0), $8)]) - HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$13], $f1=[$14], $f2=[$15], $f3=[*(CAST($6):DECIMAL(10, 0), $7)]) + HiveJoin(condition=[=($5, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $9), =($2, $10), =($3, $11))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_quantity=[$18], ws_list_price=[$20]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) + HiveFilter(condition=[=($3, 3)]) + HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) + HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3]) + HiveUnion(all=[true]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iss]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[ics]) + HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) + HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) + HiveTableScan(table=[[default, item]], table:alias=[iws]) HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$0]) - HiveJoin(condition=[AND(=($1, $4), =($2, $5), =($3, $6))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) - HiveFilter(condition=[=($3, 3)]) - HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) - HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3]) - HiveUnion(all=[true]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iss]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_item_sk=[$15]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[ics]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $6, 1999, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(i_item_sk=[$0], i_brand_id=[$7], i_class_id=[$9], i_category_id=[$11]) - HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($7), IS NOT NULL($9))]) - HiveTableScan(table=[[default, item]], table:alias=[iws]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_quantity=[$18], ws_list_price=[$20]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 11))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject($f0=[CAST(/($0, $1)):DECIMAL(22, 6)]) HiveFilter(condition=[IS NOT NULL(CAST(/($0, $1)):DECIMAL(22, 6))]) HiveProject($f0=[$0], $f1=[$1]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query2.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query2.q.out index 4c90da4476..9669e112f1 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query2.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query2.q.out @@ -126,8 +126,8 @@ POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC]) - HiveProject(d_week_seq1=[$0], _o__c1=[round(/($1, $10), 2)], _o__c2=[round(/($2, $11), 2)], _o__c3=[round(/($3, $12), 2)], _o__c4=[round(/($4, $13), 2)], _o__c5=[round(/($5, $14), 2)], _o__c6=[round(/($6, $15), 2)], _o__c7=[round(/($7, $16), 2)]) - HiveJoin(condition=[=($0, -($9, 53))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_week_seq1=[$9], _o__c1=[round(/($10, $1), 2)], _o__c2=[round(/($11, $2), 2)], _o__c3=[round(/($12, $3), 2)], _o__c4=[round(/($13, $4), 2)], _o__c5=[round(/($14, $5), 2)], _o__c6=[round(/($15, $6), 2)], _o__c7=[round(/($16, $7), 2)]) + HiveJoin(condition=[=($9, -($0, 53))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)], agg#3=[sum($4)], agg#4=[sum($5)], agg#5=[sum($6)], agg#6=[sum($7)]) @@ -145,9 +145,9 @@ HiveSortLimit(sort0=[$0], dir0=[ASC]) HiveFilter(condition=[IS NOT NULL($4)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($4))]) + HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq2=[$0], sun_sales2=[$1], mon_sales2=[$2], tue_sales2=[$3], wed_sales2=[$4], thu_sales2=[$5], fri_sales2=[$6], sat_sales2=[$7]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], d_week_seq=[$8]) HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)], agg#3=[sum($4)], agg#4=[sum($5)], agg#5=[sum($6)], agg#6=[sum($7)]) @@ -165,6 +165,6 @@ HiveSortLimit(sort0=[$0], dir0=[ASC]) HiveFilter(condition=[IS NOT NULL($4)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($6, 2002), IS NOT NULL($4))]) + HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query59.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query59.q.out index 8d17cc79d1..ba1ea84296 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query59.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query59.q.out @@ -94,32 +94,18 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) - HiveProject(s_store_name1=[$2], s_store_id1=[$1], d_week_seq1=[$3], _o__c3=[/($5, $15)], _o__c4=[/($6, $16)], _o__c5=[/($7, $7)], _o__c6=[/($8, $17)], _o__c7=[/($9, $18)], _o__c8=[/($10, $19)], _o__c9=[/($11, $20)]) - HiveJoin(condition=[AND(=($1, $14), =($3, -($13, 52)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($9, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8]) - HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)], agg#4=[sum($6)], agg#5=[sum($7)], agg#6=[sum($8)]) - HiveProject($f0=[$4], $f1=[$1], $f2=[CASE($5, $2, null:DECIMAL(7, 2))], $f3=[CASE($6, $2, null:DECIMAL(7, 2))], $f4=[CASE($7, $2, null:DECIMAL(7, 2))], $f5=[CASE($8, $2, null:DECIMAL(7, 2))], $f6=[CASE($9, $2, null:DECIMAL(7, 2))], $f7=[CASE($10, $2, null:DECIMAL(7, 2))], $f8=[CASE($11, $2, null:DECIMAL(7, 2))]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_week_seq=[$4], ==[=($14, _UTF-16LE'Sunday')], =3=[=($14, _UTF-16LE'Monday')], =4=[=($14, _UTF-16LE'Tuesday')], =5=[=($14, _UTF-16LE'Wednesday')], =6=[=($14, _UTF-16LE'Thursday')], =7=[=($14, _UTF-16LE'Friday')], =8=[=($14, _UTF-16LE'Saturday')]) - HiveFilter(condition=[IS NOT NULL($4)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1185, 1196), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d]) - HiveProject(d_week_seq2=[$2], s_store_id2=[$1], sun_sales2=[$4], mon_sales2=[$5], wed_sales2=[$6], thu_sales2=[$7], fri_sales2=[$8], sat_sales2=[$9]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_name1=[$2], s_store_id1=[$1], d_week_seq1=[$5], _o__c3=[/($7, $17)], _o__c4=[/($8, $18)], _o__c5=[/($9, $9)], _o__c6=[/($10, $19)], _o__c7=[/($11, $20)], _o__c8=[/($12, $21)], _o__c9=[/($13, $22)]) + HiveJoin(condition=[AND(=($5, -($15, 52)), =($16, $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) + HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(s_store_sk=[$0], s_store_id=[$1]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) - HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($5)], agg#3=[sum($6)], agg#4=[sum($7)], agg#5=[sum($8)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8], d_week_seq=[$9]) + HiveJoin(condition=[=($9, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], $f8=[$8]) + HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)], agg#4=[sum($6)], agg#5=[sum($7)], agg#6=[sum($8)]) HiveProject($f0=[$4], $f1=[$1], $f2=[CASE($5, $2, null:DECIMAL(7, 2))], $f3=[CASE($6, $2, null:DECIMAL(7, 2))], $f4=[CASE($7, $2, null:DECIMAL(7, 2))], $f5=[CASE($8, $2, null:DECIMAL(7, 2))], $f6=[CASE($9, $2, null:DECIMAL(7, 2))], $f7=[CASE($10, $2, null:DECIMAL(7, 2))], $f8=[CASE($11, $2, null:DECIMAL(7, 2))]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) @@ -129,6 +115,21 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ HiveFilter(condition=[IS NOT NULL($4)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(BETWEEN(false, $3, 1197, 1208), IS NOT NULL($4))]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1185, 1196), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[d]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7], d_week_seq=[$8]) + HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5], $f6=[$6], $f7=[$7]) + HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($5)], agg#3=[sum($6)], agg#4=[sum($7)], agg#5=[sum($8)]) + HiveProject($f0=[$4], $f1=[$1], $f2=[CASE($5, $2, null:DECIMAL(7, 2))], $f3=[CASE($6, $2, null:DECIMAL(7, 2))], $f4=[CASE($7, $2, null:DECIMAL(7, 2))], $f5=[CASE($8, $2, null:DECIMAL(7, 2))], $f6=[CASE($9, $2, null:DECIMAL(7, 2))], $f7=[CASE($10, $2, null:DECIMAL(7, 2))], $f8=[CASE($11, $2, null:DECIMAL(7, 2))]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_week_seq=[$4], ==[=($14, _UTF-16LE'Sunday')], =3=[=($14, _UTF-16LE'Monday')], =4=[=($14, _UTF-16LE'Tuesday')], =5=[=($14, _UTF-16LE'Wednesday')], =6=[=($14, _UTF-16LE'Thursday')], =7=[=($14, _UTF-16LE'Friday')], =8=[=($14, _UTF-16LE'Saturday')]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(BETWEEN(false, $3, 1197, 1208), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out index ace074316b..2dc727c45f 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out @@ -76,15 +76,15 @@ CBO PLAN: HiveAggregate(group=[{}], agg#0=[count(DISTINCT $6)], agg#1=[sum($7)], agg#2=[sum($8)]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(wr_order_number=[$0]) - HiveAggregate(group=[{14}]) - HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_order_number=[$1]) - HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) - HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveAggregate(group=[{4}]) + HiveJoin(condition=[AND(=($3, $1), <>($2, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) + HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(wr_order_number=[$13]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_order_number=[$0]) HiveAggregate(group=[{1}]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out index 8204245245..52c2121e68 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out @@ -1,6 +1,6 @@ -Warning: Shuffle Join MERGEJOIN[1196][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[1203][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[1210][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 22' is a cross product +Warning: Shuffle Join MERGEJOIN[1172][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[1179][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 18' is a cross product +Warning: Shuffle Join MERGEJOIN[1186][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 25' is a cross product PREHOOK: query: explain with cross_items as (select i_item_sk ss_item_sk @@ -222,840 +222,876 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 11 (BROADCAST_EDGE) -Map 24 <- Reducer 40 (BROADCAST_EDGE) -Map 63 <- Reducer 46 (BROADCAST_EDGE) -Map 64 <- Reducer 52 (BROADCAST_EDGE) -Map 66 <- Reducer 56 (BROADCAST_EDGE) -Map 67 <- Reducer 72 (BROADCAST_EDGE) -Map 73 <- Reducer 78 (BROADCAST_EDGE) -Map 79 <- Reducer 17 (BROADCAST_EDGE) -Map 80 <- Reducer 23 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 12 <- Map 10 (SIMPLE_EDGE), Map 79 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) -Reducer 14 <- Map 65 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 59 (CUSTOM_SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 17 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 10 (SIMPLE_EDGE), Map 80 (SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 38 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 20 <- Map 65 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (CUSTOM_SIMPLE_EDGE), Reducer 62 (CUSTOM_SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 23 <- Map 10 (CUSTOM_SIMPLE_EDGE) -Reducer 25 <- Map 24 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE) -Reducer 26 <- Map 65 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 29 <- Union 28 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) -Reducer 30 <- Map 65 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) -Reducer 31 <- Reducer 26 (SIMPLE_EDGE), Union 32 (CONTAINS) -Reducer 33 <- Union 32 (SIMPLE_EDGE) -Reducer 34 <- Map 65 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) -Reducer 35 <- Reducer 26 (SIMPLE_EDGE), Union 36 (CONTAINS) -Reducer 37 <- Union 36 (SIMPLE_EDGE) -Reducer 38 <- Map 65 (SIMPLE_EDGE), Reducer 37 (SIMPLE_EDGE) -Reducer 4 <- Map 65 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 40 <- Map 39 (CUSTOM_SIMPLE_EDGE) -Reducer 41 <- Map 39 (SIMPLE_EDGE), Map 63 (SIMPLE_EDGE) -Reducer 42 <- Map 65 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE) -Reducer 43 <- Reducer 42 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 44 <- Reducer 42 (SIMPLE_EDGE), Union 32 (CONTAINS) -Reducer 45 <- Reducer 42 (SIMPLE_EDGE), Union 36 (CONTAINS) -Reducer 46 <- Map 39 (CUSTOM_SIMPLE_EDGE) -Reducer 47 <- Map 39 (SIMPLE_EDGE), Map 64 (SIMPLE_EDGE) -Reducer 48 <- Map 65 (SIMPLE_EDGE), Reducer 47 (SIMPLE_EDGE) -Reducer 49 <- Reducer 48 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 50 <- Reducer 48 (SIMPLE_EDGE), Union 32 (CONTAINS) -Reducer 51 <- Reducer 48 (SIMPLE_EDGE), Union 36 (CONTAINS) -Reducer 52 <- Map 39 (CUSTOM_SIMPLE_EDGE) -Reducer 53 <- Map 39 (SIMPLE_EDGE), Map 66 (SIMPLE_EDGE), Union 54 (CONTAINS) -Reducer 55 <- Union 54 (CUSTOM_SIMPLE_EDGE) -Reducer 56 <- Map 39 (CUSTOM_SIMPLE_EDGE) -Reducer 57 <- Map 39 (SIMPLE_EDGE), Map 66 (SIMPLE_EDGE), Union 58 (CONTAINS) +Map 1 <- Reducer 12 (BROADCAST_EDGE) +Map 49 <- Reducer 52 (BROADCAST_EDGE) +Map 67 <- Reducer 54 (BROADCAST_EDGE) +Map 68 <- Reducer 56 (BROADCAST_EDGE) +Map 69 <- Reducer 60 (BROADCAST_EDGE) +Map 70 <- Reducer 75 (BROADCAST_EDGE) +Map 76 <- Reducer 81 (BROADCAST_EDGE) +Map 82 <- Reducer 19 (BROADCAST_EDGE) +Map 83 <- Reducer 26 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 11 (SIMPLE_EDGE), Map 82 (SIMPLE_EDGE) +Reducer 14 <- Map 27 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Reducer 39 (SIMPLE_EDGE) +Reducer 16 <- Map 27 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE), Reducer 63 (CUSTOM_SIMPLE_EDGE), Union 8 (CONTAINS) +Reducer 19 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 20 <- Map 11 (SIMPLE_EDGE), Map 83 (SIMPLE_EDGE) +Reducer 21 <- Map 27 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (SIMPLE_EDGE), Reducer 42 (SIMPLE_EDGE) +Reducer 23 <- Map 27 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Reducer 24 (CUSTOM_SIMPLE_EDGE), Reducer 66 (CUSTOM_SIMPLE_EDGE), Union 8 (CONTAINS) +Reducer 26 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 27 (SIMPLE_EDGE), Reducer 50 (SIMPLE_EDGE) +Reducer 29 <- Reducer 28 (SIMPLE_EDGE), Union 30 (CONTAINS) +Reducer 3 <- Map 27 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 31 <- Union 30 (SIMPLE_EDGE) +Reducer 32 <- Map 27 (SIMPLE_EDGE), Reducer 53 (SIMPLE_EDGE) +Reducer 33 <- Reducer 32 (SIMPLE_EDGE), Union 30 (CONTAINS) +Reducer 34 <- Map 27 (SIMPLE_EDGE), Reducer 55 (SIMPLE_EDGE) +Reducer 35 <- Reducer 34 (SIMPLE_EDGE), Union 30 (CONTAINS) +Reducer 36 <- Map 27 (SIMPLE_EDGE), Reducer 50 (SIMPLE_EDGE) +Reducer 37 <- Reducer 36 (SIMPLE_EDGE), Union 38 (CONTAINS) +Reducer 39 <- Union 38 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 40 <- Reducer 36 (SIMPLE_EDGE), Union 41 (CONTAINS) +Reducer 42 <- Union 41 (SIMPLE_EDGE) +Reducer 43 <- Map 27 (SIMPLE_EDGE), Reducer 53 (SIMPLE_EDGE) +Reducer 44 <- Reducer 43 (SIMPLE_EDGE), Union 38 (CONTAINS) +Reducer 45 <- Reducer 43 (SIMPLE_EDGE), Union 41 (CONTAINS) +Reducer 46 <- Map 27 (SIMPLE_EDGE), Reducer 55 (SIMPLE_EDGE) +Reducer 47 <- Reducer 46 (SIMPLE_EDGE), Union 38 (CONTAINS) +Reducer 48 <- Reducer 46 (SIMPLE_EDGE), Union 41 (CONTAINS) +Reducer 5 <- Map 27 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 50 <- Map 49 (SIMPLE_EDGE), Map 51 (SIMPLE_EDGE) +Reducer 52 <- Map 51 (CUSTOM_SIMPLE_EDGE) +Reducer 53 <- Map 51 (SIMPLE_EDGE), Map 67 (SIMPLE_EDGE) +Reducer 54 <- Map 51 (CUSTOM_SIMPLE_EDGE) +Reducer 55 <- Map 51 (SIMPLE_EDGE), Map 68 (SIMPLE_EDGE) +Reducer 56 <- Map 51 (CUSTOM_SIMPLE_EDGE) +Reducer 57 <- Map 51 (SIMPLE_EDGE), Map 69 (SIMPLE_EDGE), Union 58 (CONTAINS) Reducer 59 <- Union 58 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE), Reducer 55 (CUSTOM_SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 60 <- Map 39 (SIMPLE_EDGE), Map 66 (SIMPLE_EDGE), Union 61 (CONTAINS) -Reducer 62 <- Union 61 (CUSTOM_SIMPLE_EDGE) -Reducer 68 <- Map 67 (SIMPLE_EDGE), Map 71 (SIMPLE_EDGE), Union 54 (CONTAINS) -Reducer 69 <- Map 67 (SIMPLE_EDGE), Map 71 (SIMPLE_EDGE), Union 58 (CONTAINS) -Reducer 70 <- Map 67 (SIMPLE_EDGE), Map 71 (SIMPLE_EDGE), Union 61 (CONTAINS) -Reducer 72 <- Map 71 (CUSTOM_SIMPLE_EDGE) -Reducer 74 <- Map 73 (SIMPLE_EDGE), Map 77 (SIMPLE_EDGE), Union 54 (CONTAINS) -Reducer 75 <- Map 73 (SIMPLE_EDGE), Map 77 (SIMPLE_EDGE), Union 58 (CONTAINS) -Reducer 76 <- Map 73 (SIMPLE_EDGE), Map 77 (SIMPLE_EDGE), Union 61 (CONTAINS) -Reducer 78 <- Map 77 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Union 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 60 <- Map 51 (CUSTOM_SIMPLE_EDGE) +Reducer 61 <- Map 51 (SIMPLE_EDGE), Map 69 (SIMPLE_EDGE), Union 62 (CONTAINS) +Reducer 63 <- Union 62 (CUSTOM_SIMPLE_EDGE) +Reducer 64 <- Map 51 (SIMPLE_EDGE), Map 69 (SIMPLE_EDGE), Union 65 (CONTAINS) +Reducer 66 <- Union 65 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 59 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE), Union 8 (CONTAINS) +Reducer 71 <- Map 70 (SIMPLE_EDGE), Map 74 (SIMPLE_EDGE), Union 58 (CONTAINS) +Reducer 72 <- Map 70 (SIMPLE_EDGE), Map 74 (SIMPLE_EDGE), Union 62 (CONTAINS) +Reducer 73 <- Map 70 (SIMPLE_EDGE), Map 74 (SIMPLE_EDGE), Union 65 (CONTAINS) +Reducer 75 <- Map 74 (CUSTOM_SIMPLE_EDGE) +Reducer 77 <- Map 76 (SIMPLE_EDGE), Map 80 (SIMPLE_EDGE), Union 58 (CONTAINS) +Reducer 78 <- Map 76 (SIMPLE_EDGE), Map 80 (SIMPLE_EDGE), Union 62 (CONTAINS) +Reducer 79 <- Map 76 (SIMPLE_EDGE), Map 80 (SIMPLE_EDGE), Union 65 (CONTAINS) +Reducer 81 <- Map 80 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Union 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 9 vectorized - File Output Operator [FS_1366] - Limit [LIM_1365] (rows=100 width=223) + Reducer 10 vectorized + File Output Operator [FS_1345] + Limit [LIM_1344] (rows=100 width=220) Number of rows:100 - Select Operator [SEL_1364] (rows=304320 width=222) + Select Operator [SEL_1343] (rows=205 width=220) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1363] - Select Operator [SEL_1362] (rows=304320 width=222) + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1342] + Select Operator [SEL_1341] (rows=205 width=220) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_1361] (rows=304320 width=230) + Group By Operator [GBY_1340] (rows=205 width=228) Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Union 7 [SIMPLE_EDGE] - <-Reducer 16 [CONTAINS] - Reduce Output Operator [RS_1209] + <-Union 8 [SIMPLE_EDGE] + <-Reducer 18 [CONTAINS] + Reduce Output Operator [RS_1185] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1208] (rows=304320 width=230) + Group By Operator [GBY_1184] (rows=205 width=228) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1207] (rows=121728 width=220) + Top N Key Operator [TNK_1183] (rows=82 width=220) keys:_col0, _col1, _col2, _col3,top n:100 - Select Operator [SEL_1205] (rows=40576 width=222) + Select Operator [SEL_1181] (rows=24 width=222) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1204] (rows=40576 width=243) + Filter Operator [FIL_1180] (rows=24 width=243) predicate:(_col3 > _col5) - Merge Join Operator [MERGEJOIN_1203] (rows=121728 width=243) + Merge Join Operator [MERGEJOIN_1179] (rows=72 width=243) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1378] - Filter Operator [FIL_1377] (rows=121728 width=131) + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1357] + Filter Operator [FIL_1356] (rows=72 width=131) predicate:_col3 is not null - Group By Operator [GBY_1376] (rows=121728 width=131) + Group By Operator [GBY_1355] (rows=72 width=131) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_238] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_236] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_237] (rows=486912 width=131) + Group By Operator [GBY_235] (rows=72 width=131) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_235] (rows=7790806 width=106) + Select Operator [SEL_233] (rows=12217 width=10) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1176] (rows=7790806 width=106) - Conds:RS_232._col1=RS_1344._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1344] + Merge Join Operator [MERGEJOIN_1152] (rows=12217 width=10) + Conds:RS_230._col1=RS_1317._col0(Inner),Output:["_col2","_col3","_col13","_col14","_col15"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1317] PartitionCols:_col0 - Select Operator [SEL_1335] (rows=462000 width=15) + Select Operator [SEL_1308] (rows=462000 width=15) Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_81] (rows=462000 width=15) + TableScan [TS_6] (rows=462000 width=15) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_232] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_230] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1175] (rows=7790806 width=98) - Conds:RS_229._col1=RS_230._col0(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_229] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1167] (rows=7790806 width=98) - Conds:RS_1371._col0=RS_1311._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1311] + Merge Join Operator [MERGEJOIN_1151] (rows=12217 width=4) + Conds:RS_227._col6, _col7, _col8=RS_1354._col0, _col1, _col2(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_227] + PartitionCols:_col6, _col7, _col8 + Merge Join Operator [MERGEJOIN_1144] (rows=7733674 width=110) + Conds:RS_224._col1=RS_1324._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1324] PartitionCols:_col0 - Select Operator [SEL_1308] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_1307] (rows=50 width=12) - predicate:((d_year = 2000) and (d_moy = 11)) - TableScan [TS_3] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 79 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1371] - PartitionCols:_col0 - Select Operator [SEL_1370] (rows=286549727 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1369] (rows=286549727 width=123) - predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_227_date_dim_d_date_sk_min) AND DynamicValue(RS_227_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_227_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_143] (rows=287989836 width=123) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_quantity","cs_list_price"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1368] - Group By Operator [GBY_1367] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1319] - Group By Operator [GBY_1316] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1312] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1308] - <-Reducer 34 [SIMPLE_EDGE] - SHUFFLE [RS_230] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1174] (rows=724 width=4) - Conds:RS_1352._col1, _col2, _col3=RS_1375._col0, _col1, _col2(Inner),Output:["_col0"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1352] - PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1345] (rows=458612 width=15) + Select Operator [SEL_1313] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1336] (rows=458612 width=15) + Filter Operator [FIL_1304] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) - Please refer to the previous TableScan [TS_81] - <-Reducer 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1375] - PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1374] (rows=1 width=12) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1373] (rows=1 width=20) - predicate:(_col3 = 3L) - Group By Operator [GBY_1372] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 32 [SIMPLE_EDGE] - <-Reducer 31 [CONTAINS] vectorized - Reduce Output Operator [RS_1429] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1428] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1427] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_169] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_25] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 - Merge Join Operator [MERGEJOIN_1156] (rows=14628613 width=11) - Conds:RS_21._col1=RS_1349._col0(Inner),Output:["_col4","_col5","_col6"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1349] + Please refer to the previous TableScan [TS_6] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_224] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1143] (rows=7790806 width=98) + Conds:RS_1350._col0=RS_1287._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 11 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1287] + PartitionCols:_col0 + Select Operator [SEL_1284] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_1283] (rows=50 width=12) + predicate:((d_year = 2000) and (d_moy = 11)) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 82 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1350] + PartitionCols:_col0 + Select Operator [SEL_1349] (rows=286549727 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1348] (rows=286549727 width=123) + predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_222_date_dim_d_date_sk_min) AND DynamicValue(RS_222_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_222_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_142] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_quantity","cs_list_price"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1347] + Group By Operator [GBY_1346] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1295] + Group By Operator [GBY_1292] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1288] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1284] + <-Reducer 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1354] + PartitionCols:_col0, _col1, _col2 + Select Operator [SEL_1353] (rows=1 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1352] (rows=1 width=20) + predicate:(_col3 = 3L) + Group By Operator [GBY_1351] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Union 38 [SIMPLE_EDGE] + <-Reducer 37 [CONTAINS] vectorized + Reduce Output Operator [RS_1424] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1423] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1422] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 36 [SIMPLE_EDGE] + SHUFFLE [RS_168] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_167] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 + Merge Join Operator [MERGEJOIN_1146] (rows=14628613 width=11) + Conds:RS_163._col1=RS_1325._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1325] + PartitionCols:_col0 + Select Operator [SEL_1314] (rows=458612 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1305] (rows=458612 width=15) + predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) + Please refer to the previous TableScan [TS_6] + <-Reducer 50 [SIMPLE_EDGE] + SHUFFLE [RS_163] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1132] (rows=14736682 width=4) + Conds:RS_1402._col0=RS_1380._col0(Inner),Output:["_col1"] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1380] PartitionCols:_col0 - Select Operator [SEL_1341] (rows=458612 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1332] (rows=458612 width=15) - predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) - Please refer to the previous TableScan [TS_81] - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1155] (rows=14736682 width=4) - Conds:RS_1423._col0=RS_1401._col0(Inner),Output:["_col1"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1401] - PartitionCols:_col0 - Select Operator [SEL_1400] (rows=1957 width=4) - Output:["_col0"] - Filter Operator [FIL_1399] (rows=1957 width=8) - predicate:d_year BETWEEN 1999 AND 2001 - TableScan [TS_12] (rows=73049 width=8) - default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1423] - PartitionCols:_col0 - Select Operator [SEL_1422] (rows=550076554 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_1421] (rows=550076554 width=7) - predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_19_d1_d_date_sk_min) AND DynamicValue(RS_19_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_d1_d_date_sk_bloom_filter))) - TableScan [TS_9] (rows=575995635 width=7) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk"] - <-Reducer 40 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1420] - Group By Operator [GBY_1419] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 39 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1415] - Group By Operator [GBY_1411] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1402] (rows=1957 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1400] - <-Reducer 44 [CONTAINS] vectorized - Reduce Output Operator [RS_1443] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1442] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1441] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 42 [SIMPLE_EDGE] - SHUFFLE [RS_189] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_45] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 - Merge Join Operator [MERGEJOIN_1158] (rows=7620440 width=11) - Conds:RS_41._col1=RS_1350._col0(Inner),Output:["_col4","_col5","_col6"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1350] + Select Operator [SEL_1379] (rows=1957 width=4) + Output:["_col0"] + Filter Operator [FIL_1378] (rows=1957 width=8) + predicate:d_year BETWEEN 1999 AND 2001 + TableScan [TS_12] (rows=73049 width=8) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 49 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1402] PartitionCols:_col0 - Select Operator [SEL_1342] (rows=458612 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1333] (rows=458612 width=15) - predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) - Please refer to the previous TableScan [TS_81] - <-Reducer 41 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1157] (rows=7676736 width=4) - Conds:RS_1437._col0=RS_1403._col0(Inner),Output:["_col1"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1403] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1400] - <-Map 63 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1437] - PartitionCols:_col0 - Select Operator [SEL_1436] (rows=286549727 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_1435] (rows=286549727 width=7) - predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_39_d2_d_date_sk_min) AND DynamicValue(RS_39_d2_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_39_d2_d_date_sk_bloom_filter))) - TableScan [TS_29] (rows=287989836 width=7) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk"] - <-Reducer 46 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1434] - Group By Operator [GBY_1433] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 39 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1416] - Group By Operator [GBY_1412] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1404] (rows=1957 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1400] - <-Reducer 50 [CONTAINS] vectorized - Reduce Output Operator [RS_1457] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1456] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1455] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 48 [SIMPLE_EDGE] - SHUFFLE [RS_210] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_66] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 - Merge Join Operator [MERGEJOIN_1160] (rows=3828623 width=11) - Conds:RS_62._col1=RS_1351._col0(Inner),Output:["_col4","_col5","_col6"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1351] + Select Operator [SEL_1401] (rows=550076554 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_1400] (rows=550076554 width=7) + predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_19_d1_d_date_sk_min) AND DynamicValue(RS_19_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_d1_d_date_sk_bloom_filter))) + TableScan [TS_9] (rows=575995635 width=7) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk"] + <-Reducer 52 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1399] + Group By Operator [GBY_1398] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 51 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1394] + Group By Operator [GBY_1390] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1381] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1379] + <-Reducer 44 [CONTAINS] vectorized + Reduce Output Operator [RS_1430] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1429] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1428] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 43 [SIMPLE_EDGE] + SHUFFLE [RS_188] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_187] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 + Merge Join Operator [MERGEJOIN_1148] (rows=7620440 width=11) + Conds:RS_183._col1=RS_1326._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1326] + PartitionCols:_col0 + Select Operator [SEL_1315] (rows=458612 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1306] (rows=458612 width=15) + predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) + Please refer to the previous TableScan [TS_6] + <-Reducer 53 [SIMPLE_EDGE] + SHUFFLE [RS_183] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1134] (rows=7676736 width=4) + Conds:RS_1410._col0=RS_1382._col0(Inner),Output:["_col1"] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1382] PartitionCols:_col0 - Select Operator [SEL_1343] (rows=458612 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1334] (rows=458612 width=15) - predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) - Please refer to the previous TableScan [TS_81] - <-Reducer 47 [SIMPLE_EDGE] - SHUFFLE [RS_62] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1159] (rows=3856907 width=4) - Conds:RS_1451._col0=RS_1405._col0(Inner),Output:["_col1"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1405] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1400] - <-Map 64 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1451] - PartitionCols:_col0 - Select Operator [SEL_1450] (rows=143966864 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_1449] (rows=143966864 width=7) - predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_60_d3_d_date_sk_min) AND DynamicValue(RS_60_d3_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_60_d3_d_date_sk_bloom_filter))) - TableScan [TS_50] (rows=144002668 width=7) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk"] - <-Reducer 52 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1448] - Group By Operator [GBY_1447] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 39 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1417] - Group By Operator [GBY_1413] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1406] (rows=1957 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1400] - <-Reducer 59 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1382] - Select Operator [SEL_1381] (rows=1 width=112) + Please refer to the previous Select Operator [SEL_1379] + <-Map 67 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1410] + PartitionCols:_col0 + Select Operator [SEL_1409] (rows=286549727 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_1408] (rows=286549727 width=7) + predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_39_d2_d_date_sk_min) AND DynamicValue(RS_39_d2_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_39_d2_d_date_sk_bloom_filter))) + TableScan [TS_29] (rows=287989836 width=7) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk"] + <-Reducer 54 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1407] + Group By Operator [GBY_1406] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 51 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1395] + Group By Operator [GBY_1391] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1383] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1379] + <-Reducer 47 [CONTAINS] vectorized + Reduce Output Operator [RS_1436] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1435] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1434] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 46 [SIMPLE_EDGE] + SHUFFLE [RS_209] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_208] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 + Merge Join Operator [MERGEJOIN_1150] (rows=3828623 width=11) + Conds:RS_204._col1=RS_1327._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1327] + PartitionCols:_col0 + Select Operator [SEL_1316] (rows=458612 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1307] (rows=458612 width=15) + predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) + Please refer to the previous TableScan [TS_6] + <-Reducer 55 [SIMPLE_EDGE] + SHUFFLE [RS_204] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1136] (rows=3856907 width=4) + Conds:RS_1418._col0=RS_1384._col0(Inner),Output:["_col1"] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1384] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1379] + <-Map 68 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1418] + PartitionCols:_col0 + Select Operator [SEL_1417] (rows=143966864 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_1416] (rows=143966864 width=7) + predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_60_d3_d_date_sk_min) AND DynamicValue(RS_60_d3_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_60_d3_d_date_sk_bloom_filter))) + TableScan [TS_50] (rows=144002668 width=7) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk"] + <-Reducer 56 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1415] + Group By Operator [GBY_1414] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 51 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1396] + Group By Operator [GBY_1392] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1385] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1379] + <-Reducer 63 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1361] + Select Operator [SEL_1360] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_1380] (rows=1 width=120) + Filter Operator [FIL_1359] (rows=1 width=120) predicate:CAST( (_col0 / _col1) AS decimal(22,6)) is not null - Group By Operator [GBY_1379] (rows=1 width=120) + Group By Operator [GBY_1358] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] - <-Union 58 [CUSTOM_SIMPLE_EDGE] - <-Reducer 57 [CONTAINS] - Reduce Output Operator [RS_1264] - Group By Operator [GBY_1263] (rows=1 width=120) + <-Union 62 [CUSTOM_SIMPLE_EDGE] + <-Reducer 61 [CONTAINS] + Reduce Output Operator [RS_1240] + Group By Operator [GBY_1239] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1262] (rows=26270325 width=44) + Select Operator [SEL_1238] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1260] (rows=14736682 width=0) + Select Operator [SEL_1236] (rows=14736682 width=0) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1259] (rows=14736682 width=0) - Conds:RS_1466._col0=RS_1409._col0(Inner),Output:["_col1","_col2"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1409] + Merge Join Operator [MERGEJOIN_1235] (rows=14736682 width=0) + Conds:RS_1445._col0=RS_1388._col0(Inner),Output:["_col1","_col2"] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1388] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1400] - <-Map 66 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1466] + Please refer to the previous Select Operator [SEL_1379] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1445] PartitionCols:_col0 - Select Operator [SEL_1464] (rows=550076554 width=114) + Select Operator [SEL_1443] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1463] (rows=550076554 width=114) - predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_106_date_dim_d_date_sk_min) AND DynamicValue(RS_106_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_106_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_99] (rows=575995635 width=114) + Filter Operator [FIL_1442] (rows=550076554 width=114) + predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_105_date_dim_d_date_sk_min) AND DynamicValue(RS_105_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_105_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_98] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_quantity","ss_list_price"] - <-Reducer 56 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1462] - Group By Operator [GBY_1461] (rows=1 width=12) + <-Reducer 60 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1441] + Group By Operator [GBY_1440] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 39 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1418] - Group By Operator [GBY_1414] (rows=1 width=12) + <-Map 51 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1397] + Group By Operator [GBY_1393] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1408] (rows=1957 width=4) + Select Operator [SEL_1387] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1400] - <-Reducer 69 [CONTAINS] - Reduce Output Operator [RS_1282] - Group By Operator [GBY_1281] (rows=1 width=120) + Please refer to the previous Select Operator [SEL_1379] + <-Reducer 72 [CONTAINS] + Reduce Output Operator [RS_1258] + Group By Operator [GBY_1257] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1280] (rows=26270325 width=44) + Select Operator [SEL_1256] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1278] (rows=7676736 width=94) + Select Operator [SEL_1254] (rows=7676736 width=94) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1277] (rows=7676736 width=94) - Conds:RS_1481._col0=RS_1472._col0(Inner),Output:["_col1","_col2"] - <-Map 71 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1472] + Merge Join Operator [MERGEJOIN_1253] (rows=7676736 width=94) + Conds:RS_1460._col0=RS_1451._col0(Inner),Output:["_col1","_col2"] + <-Map 74 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1451] PartitionCols:_col0 - Select Operator [SEL_1469] (rows=1957 width=4) + Select Operator [SEL_1448] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_1468] (rows=1957 width=8) + Filter Operator [FIL_1447] (rows=1957 width=8) predicate:d_year BETWEEN 1998 AND 2000 - TableScan [TS_112] (rows=73049 width=8) + TableScan [TS_111] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 67 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1481] + <-Map 70 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1460] PartitionCols:_col0 - Select Operator [SEL_1479] (rows=286549727 width=119) + Select Operator [SEL_1458] (rows=286549727 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1478] (rows=286549727 width=119) - predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_116_date_dim_d_date_sk_min) AND DynamicValue(RS_116_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_116_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_109] (rows=287989836 width=119) + Filter Operator [FIL_1457] (rows=286549727 width=119) + predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_115_date_dim_d_date_sk_min) AND DynamicValue(RS_115_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_115_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_108] (rows=287989836 width=119) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_quantity","cs_list_price"] - <-Reducer 72 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1477] - Group By Operator [GBY_1476] (rows=1 width=12) + <-Reducer 75 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1456] + Group By Operator [GBY_1455] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 71 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1475] - Group By Operator [GBY_1474] (rows=1 width=12) + <-Map 74 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1454] + Group By Operator [GBY_1453] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1471] (rows=1957 width=4) + Select Operator [SEL_1450] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1469] - <-Reducer 75 [CONTAINS] - Reduce Output Operator [RS_1300] - Group By Operator [GBY_1299] (rows=1 width=120) + Please refer to the previous Select Operator [SEL_1448] + <-Reducer 78 [CONTAINS] + Reduce Output Operator [RS_1276] + Group By Operator [GBY_1275] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1298] (rows=26270325 width=44) + Select Operator [SEL_1274] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1296] (rows=3856907 width=114) + Select Operator [SEL_1272] (rows=3856907 width=114) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1295] (rows=3856907 width=114) - Conds:RS_1496._col0=RS_1487._col0(Inner),Output:["_col1","_col2"] - <-Map 77 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1487] + Merge Join Operator [MERGEJOIN_1271] (rows=3856907 width=114) + Conds:RS_1475._col0=RS_1466._col0(Inner),Output:["_col1","_col2"] + <-Map 80 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1466] PartitionCols:_col0 - Select Operator [SEL_1484] (rows=1957 width=4) + Select Operator [SEL_1463] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_1483] (rows=1957 width=8) + Filter Operator [FIL_1462] (rows=1957 width=8) predicate:d_year BETWEEN 1998 AND 2000 - TableScan [TS_123] (rows=73049 width=8) + TableScan [TS_122] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1496] + <-Map 76 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1475] PartitionCols:_col0 - Select Operator [SEL_1494] (rows=143966864 width=119) + Select Operator [SEL_1473] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1493] (rows=143966864 width=119) - predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_127_date_dim_d_date_sk_min) AND DynamicValue(RS_127_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_127_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_120] (rows=144002668 width=119) + Filter Operator [FIL_1472] (rows=143966864 width=119) + predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_126_date_dim_d_date_sk_min) AND DynamicValue(RS_126_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_126_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_119] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_quantity","ws_list_price"] - <-Reducer 78 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1492] - Group By Operator [GBY_1491] (rows=1 width=12) + <-Reducer 81 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1471] + Group By Operator [GBY_1470] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 77 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1490] - Group By Operator [GBY_1489] (rows=1 width=12) + <-Map 80 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1469] + Group By Operator [GBY_1468] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1486] (rows=1957 width=4) + Select Operator [SEL_1465] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1484] - <-Reducer 22 [CONTAINS] - Reduce Output Operator [RS_1216] + Please refer to the previous Select Operator [SEL_1463] + <-Reducer 25 [CONTAINS] + Reduce Output Operator [RS_1192] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1215] (rows=304320 width=230) + Group By Operator [GBY_1191] (rows=205 width=228) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1214] (rows=121728 width=220) + Top N Key Operator [TNK_1190] (rows=82 width=220) keys:_col0, _col1, _col2, _col3,top n:100 - Select Operator [SEL_1212] (rows=40576 width=218) + Select Operator [SEL_1188] (rows=13 width=218) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1211] (rows=40576 width=243) + Filter Operator [FIL_1187] (rows=13 width=243) predicate:(_col3 > _col5) - Merge Join Operator [MERGEJOIN_1210] (rows=121728 width=243) + Merge Join Operator [MERGEJOIN_1186] (rows=40 width=242) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1394] - Filter Operator [FIL_1393] (rows=121728 width=131) + <-Reducer 24 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1373] + Filter Operator [FIL_1372] (rows=40 width=130) predicate:_col3 is not null - Group By Operator [GBY_1392] (rows=121728 width=131) + Group By Operator [GBY_1371] (rows=40 width=130) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_382] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_379] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_381] (rows=243456 width=131) + Group By Operator [GBY_378] (rows=40 width=130) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_379] (rows=3942084 width=126) + Select Operator [SEL_376] (rows=6181 width=9) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1189] (rows=3942084 width=126) - Conds:RS_376._col1=RS_1346._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1346] + Merge Join Operator [MERGEJOIN_1165] (rows=6181 width=9) + Conds:RS_373._col1=RS_1319._col0(Inner),Output:["_col2","_col3","_col13","_col14","_col15"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1319] PartitionCols:_col0 - Select Operator [SEL_1337] (rows=462000 width=15) + Select Operator [SEL_1310] (rows=462000 width=15) Output:["_col0","_col1","_col2","_col3"] - Please refer to the previous TableScan [TS_81] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_376] + Please refer to the previous TableScan [TS_6] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_373] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1188] (rows=3942084 width=118) - Conds:RS_373._col1=RS_374._col0(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_373] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1180] (rows=3942084 width=118) - Conds:RS_1387._col0=RS_1313._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1313] + Merge Join Operator [MERGEJOIN_1164] (rows=6181 width=4) + Conds:RS_370._col6, _col7, _col8=RS_1370._col0, _col1, _col2(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_370] + PartitionCols:_col6, _col7, _col8 + Merge Join Operator [MERGEJOIN_1157] (rows=3913176 width=130) + Conds:RS_367._col1=RS_1328._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1328] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1308] - <-Map 80 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1387] - PartitionCols:_col0 - Select Operator [SEL_1386] (rows=143966864 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1385] (rows=143966864 width=123) - predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_371_date_dim_d_date_sk_min) AND DynamicValue(RS_371_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_371_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_287] (rows=144002668 width=123) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_quantity","ws_list_price"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1384] - Group By Operator [GBY_1383] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1320] - Group By Operator [GBY_1317] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1314] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1308] - <-Reducer 38 [SIMPLE_EDGE] - SHUFFLE [RS_374] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1187] (rows=724 width=4) - Conds:RS_1353._col1, _col2, _col3=RS_1391._col0, _col1, _col2(Inner),Output:["_col0"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1353] - PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1347] (rows=458612 width=15) + Select Operator [SEL_1318] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1338] (rows=458612 width=15) + Filter Operator [FIL_1309] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) - Please refer to the previous TableScan [TS_81] - <-Reducer 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1391] - PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1390] (rows=1 width=12) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1389] (rows=1 width=20) - predicate:(_col3 = 3L) - Group By Operator [GBY_1388] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 36 [SIMPLE_EDGE] - <-Reducer 35 [CONTAINS] vectorized - Reduce Output Operator [RS_1432] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1431] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1430] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_313] - PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_25] - <-Reducer 45 [CONTAINS] vectorized - Reduce Output Operator [RS_1446] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1445] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1444] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 42 [SIMPLE_EDGE] - SHUFFLE [RS_333] - PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_45] - <-Reducer 51 [CONTAINS] vectorized - Reduce Output Operator [RS_1460] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1459] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1458] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 48 [SIMPLE_EDGE] - SHUFFLE [RS_354] - PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_66] - <-Reducer 62 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1398] - Select Operator [SEL_1397] (rows=1 width=112) + Please refer to the previous TableScan [TS_6] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_367] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1156] (rows=3942084 width=118) + Conds:RS_1366._col0=RS_1289._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 11 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1289] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1284] + <-Map 83 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1366] + PartitionCols:_col0 + Select Operator [SEL_1365] (rows=143966864 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1364] (rows=143966864 width=123) + predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_365_date_dim_d_date_sk_min) AND DynamicValue(RS_365_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_365_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_285] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_quantity","ws_list_price"] + <-Reducer 26 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1363] + Group By Operator [GBY_1362] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1296] + Group By Operator [GBY_1293] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1290] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1284] + <-Reducer 42 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1370] + PartitionCols:_col0, _col1, _col2 + Select Operator [SEL_1369] (rows=1 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1368] (rows=1 width=20) + predicate:(_col3 = 3L) + Group By Operator [GBY_1367] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Union 41 [SIMPLE_EDGE] + <-Reducer 40 [CONTAINS] vectorized + Reduce Output Operator [RS_1427] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1426] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1425] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 36 [SIMPLE_EDGE] + SHUFFLE [RS_311] + PartitionCols:_col0, _col1, _col2 + Please refer to the previous Group By Operator [GBY_167] + <-Reducer 45 [CONTAINS] vectorized + Reduce Output Operator [RS_1433] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1432] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1431] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 43 [SIMPLE_EDGE] + SHUFFLE [RS_331] + PartitionCols:_col0, _col1, _col2 + Please refer to the previous Group By Operator [GBY_187] + <-Reducer 48 [CONTAINS] vectorized + Reduce Output Operator [RS_1439] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1438] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1437] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 46 [SIMPLE_EDGE] + SHUFFLE [RS_352] + PartitionCols:_col0, _col1, _col2 + Please refer to the previous Group By Operator [GBY_208] + <-Reducer 66 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1377] + Select Operator [SEL_1376] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_1396] (rows=1 width=120) + Filter Operator [FIL_1375] (rows=1 width=120) predicate:CAST( (_col0 / _col1) AS decimal(22,6)) is not null - Group By Operator [GBY_1395] (rows=1 width=120) + Group By Operator [GBY_1374] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] - <-Union 61 [CUSTOM_SIMPLE_EDGE] - <-Reducer 60 [CONTAINS] - Reduce Output Operator [RS_1270] - Group By Operator [GBY_1269] (rows=1 width=120) + <-Union 65 [CUSTOM_SIMPLE_EDGE] + <-Reducer 64 [CONTAINS] + Reduce Output Operator [RS_1246] + Group By Operator [GBY_1245] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1268] (rows=26270325 width=44) + Select Operator [SEL_1244] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1266] (rows=14736682 width=0) + Select Operator [SEL_1242] (rows=14736682 width=0) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1265] (rows=14736682 width=0) - Conds:RS_1467._col0=RS_1410._col0(Inner),Output:["_col1","_col2"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1410] + Merge Join Operator [MERGEJOIN_1241] (rows=14736682 width=0) + Conds:RS_1446._col0=RS_1389._col0(Inner),Output:["_col1","_col2"] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1389] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1400] - <-Map 66 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1467] + Please refer to the previous Select Operator [SEL_1379] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1446] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1464] - <-Reducer 70 [CONTAINS] - Reduce Output Operator [RS_1288] - Group By Operator [GBY_1287] (rows=1 width=120) + Please refer to the previous Select Operator [SEL_1443] + <-Reducer 73 [CONTAINS] + Reduce Output Operator [RS_1264] + Group By Operator [GBY_1263] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1286] (rows=26270325 width=44) + Select Operator [SEL_1262] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1284] (rows=7676736 width=94) + Select Operator [SEL_1260] (rows=7676736 width=94) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1283] (rows=7676736 width=94) - Conds:RS_1482._col0=RS_1473._col0(Inner),Output:["_col1","_col2"] - <-Map 71 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1473] + Merge Join Operator [MERGEJOIN_1259] (rows=7676736 width=94) + Conds:RS_1461._col0=RS_1452._col0(Inner),Output:["_col1","_col2"] + <-Map 74 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1452] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1469] - <-Map 67 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1482] + Please refer to the previous Select Operator [SEL_1448] + <-Map 70 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1461] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1479] - <-Reducer 76 [CONTAINS] - Reduce Output Operator [RS_1306] - Group By Operator [GBY_1305] (rows=1 width=120) + Please refer to the previous Select Operator [SEL_1458] + <-Reducer 79 [CONTAINS] + Reduce Output Operator [RS_1282] + Group By Operator [GBY_1281] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1304] (rows=26270325 width=44) + Select Operator [SEL_1280] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1302] (rows=3856907 width=114) + Select Operator [SEL_1278] (rows=3856907 width=114) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1301] (rows=3856907 width=114) - Conds:RS_1497._col0=RS_1488._col0(Inner),Output:["_col1","_col2"] - <-Map 77 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1488] + Merge Join Operator [MERGEJOIN_1277] (rows=3856907 width=114) + Conds:RS_1476._col0=RS_1467._col0(Inner),Output:["_col1","_col2"] + <-Map 80 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1467] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1484] - <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1497] + Please refer to the previous Select Operator [SEL_1463] + <-Map 76 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1476] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1494] - <-Reducer 6 [CONTAINS] - Reduce Output Operator [RS_1202] + Please refer to the previous Select Operator [SEL_1473] + <-Reducer 7 [CONTAINS] + Reduce Output Operator [RS_1178] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1201] (rows=304320 width=230) + Group By Operator [GBY_1177] (rows=205 width=228) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1200] (rows=121728 width=220) + Top N Key Operator [TNK_1176] (rows=82 width=220) keys:_col0, _col1, _col2, _col3,top n:100 - Select Operator [SEL_1198] (rows=40576 width=220) + Select Operator [SEL_1174] (rows=45 width=220) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1197] (rows=40576 width=243) + Filter Operator [FIL_1173] (rows=45 width=243) predicate:(_col3 > _col5) - Merge Join Operator [MERGEJOIN_1196] (rows=121728 width=243) + Merge Join Operator [MERGEJOIN_1172] (rows=136 width=243) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1356] - Filter Operator [FIL_1355] (rows=121728 width=131) - predicate:_col3 is not null - Group By Operator [GBY_1354] (rows=121728 width=131) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_95] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_94] (rows=121728 width=131) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 - Select Operator [SEL_92] (rows=15062131 width=11) - Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1163] (rows=15062131 width=11) - Conds:RS_89._col1=RS_1339._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1339] - PartitionCols:_col0 - Select Operator [SEL_1330] (rows=462000 width=15) - Output:["_col0","_col1","_col2","_col3"] - Please refer to the previous TableScan [TS_81] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_89] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1162] (rows=15062131 width=4) - Conds:RS_86._col1=RS_87._col0(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_86] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1154] (rows=15062131 width=4) - Conds:RS_1325._col0=RS_1309._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 10 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1309] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1308] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1325] - PartitionCols:_col0 - Select Operator [SEL_1324] (rows=550076554 width=118) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1323] (rows=550076554 width=118) - predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_84_date_dim_d_date_sk_min) AND DynamicValue(RS_84_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_84_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=118) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_quantity","ss_list_price"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1322] - Group By Operator [GBY_1321] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1318] - Group By Operator [GBY_1315] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1310] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1308] - <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_87] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1161] (rows=724 width=4) - Conds:RS_1348._col1, _col2, _col3=RS_1329._col0, _col1, _col2(Inner),Output:["_col0"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1348] - PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1340] (rows=458612 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1331] (rows=458612 width=15) - predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) - Please refer to the previous TableScan [TS_81] - <-Reducer 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1329] - PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1328] (rows=1 width=12) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1327] (rows=1 width=20) - predicate:(_col3 = 3L) - Group By Operator [GBY_1326] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Union 28 [SIMPLE_EDGE] - <-Reducer 27 [CONTAINS] vectorized - Reduce Output Operator [RS_1426] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1425] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1424] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_26] - PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_25] - <-Reducer 43 [CONTAINS] vectorized - Reduce Output Operator [RS_1440] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1439] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1438] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 42 [SIMPLE_EDGE] - SHUFFLE [RS_46] - PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_45] - <-Reducer 49 [CONTAINS] vectorized - Reduce Output Operator [RS_1454] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1453] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1452] (rows=121728 width=19) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 48 [SIMPLE_EDGE] - SHUFFLE [RS_67] - PartitionCols:_col0, _col1, _col2 - Please refer to the previous Group By Operator [GBY_66] - <-Reducer 55 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1360] - Select Operator [SEL_1359] (rows=1 width=112) + <-Reducer 59 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1339] + Select Operator [SEL_1338] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_1358] (rows=1 width=120) + Filter Operator [FIL_1337] (rows=1 width=120) predicate:CAST( (_col0 / _col1) AS decimal(22,6)) is not null - Group By Operator [GBY_1357] (rows=1 width=120) + Group By Operator [GBY_1336] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] - <-Union 54 [CUSTOM_SIMPLE_EDGE] - <-Reducer 53 [CONTAINS] - Reduce Output Operator [RS_1258] - Group By Operator [GBY_1257] (rows=1 width=120) + <-Union 58 [CUSTOM_SIMPLE_EDGE] + <-Reducer 57 [CONTAINS] + Reduce Output Operator [RS_1234] + Group By Operator [GBY_1233] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1256] (rows=26270325 width=44) + Select Operator [SEL_1232] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1254] (rows=14736682 width=0) + Select Operator [SEL_1230] (rows=14736682 width=0) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1253] (rows=14736682 width=0) - Conds:RS_1465._col0=RS_1407._col0(Inner),Output:["_col1","_col2"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1407] + Merge Join Operator [MERGEJOIN_1229] (rows=14736682 width=0) + Conds:RS_1444._col0=RS_1386._col0(Inner),Output:["_col1","_col2"] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1386] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1400] - <-Map 66 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1465] + Please refer to the previous Select Operator [SEL_1379] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1444] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1464] - <-Reducer 68 [CONTAINS] - Reduce Output Operator [RS_1276] - Group By Operator [GBY_1275] (rows=1 width=120) + Please refer to the previous Select Operator [SEL_1443] + <-Reducer 71 [CONTAINS] + Reduce Output Operator [RS_1252] + Group By Operator [GBY_1251] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1274] (rows=26270325 width=44) + Select Operator [SEL_1250] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1272] (rows=7676736 width=94) + Select Operator [SEL_1248] (rows=7676736 width=94) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1271] (rows=7676736 width=94) - Conds:RS_1480._col0=RS_1470._col0(Inner),Output:["_col1","_col2"] - <-Map 71 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1470] + Merge Join Operator [MERGEJOIN_1247] (rows=7676736 width=94) + Conds:RS_1459._col0=RS_1449._col0(Inner),Output:["_col1","_col2"] + <-Map 74 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1449] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1469] - <-Map 67 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1480] + Please refer to the previous Select Operator [SEL_1448] + <-Map 70 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1459] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1479] - <-Reducer 74 [CONTAINS] - Reduce Output Operator [RS_1294] - Group By Operator [GBY_1293] (rows=1 width=120) + Please refer to the previous Select Operator [SEL_1458] + <-Reducer 77 [CONTAINS] + Reduce Output Operator [RS_1270] + Group By Operator [GBY_1269] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1292] (rows=26270325 width=44) + Select Operator [SEL_1268] (rows=26270325 width=44) Output:["_col0"] - Select Operator [SEL_1290] (rows=3856907 width=114) + Select Operator [SEL_1266] (rows=3856907 width=114) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1289] (rows=3856907 width=114) - Conds:RS_1495._col0=RS_1485._col0(Inner),Output:["_col1","_col2"] - <-Map 77 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1485] + Merge Join Operator [MERGEJOIN_1265] (rows=3856907 width=114) + Conds:RS_1474._col0=RS_1464._col0(Inner),Output:["_col1","_col2"] + <-Map 80 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1464] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1484] - <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1495] + Please refer to the previous Select Operator [SEL_1463] + <-Map 76 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1474] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1494] + Please refer to the previous Select Operator [SEL_1473] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1335] + Filter Operator [FIL_1334] (rows=136 width=131) + predicate:_col3 is not null + Group By Operator [GBY_1333] (rows=136 width=131) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_94] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_93] (rows=136 width=131) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 + Select Operator [SEL_91] (rows=23620 width=11) + Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_1139] (rows=23620 width=11) + Conds:RS_88._col1=RS_1312._col0(Inner),Output:["_col2","_col3","_col13","_col14","_col15"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1312] + PartitionCols:_col0 + Select Operator [SEL_1303] (rows=462000 width=15) + Output:["_col0","_col1","_col2","_col3"] + Please refer to the previous TableScan [TS_6] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_88] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1138] (rows=23620 width=4) + Conds:RS_85._col6, _col7, _col8=RS_1332._col0, _col1, _col2(Inner),Output:["_col1","_col2","_col3"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_85] + PartitionCols:_col6, _col7, _col8 + Merge Join Operator [MERGEJOIN_1131] (rows=14951676 width=15) + Conds:RS_82._col1=RS_1320._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1320] + PartitionCols:_col0 + Select Operator [SEL_1311] (rows=458612 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1302] (rows=458612 width=15) + predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) + Please refer to the previous TableScan [TS_6] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_82] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1130] (rows=15062131 width=4) + Conds:RS_1301._col0=RS_1285._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 11 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1285] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1284] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1301] + PartitionCols:_col0 + Select Operator [SEL_1300] (rows=550076554 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_1299] (rows=550076554 width=118) + predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_80_date_dim_d_date_sk_min) AND DynamicValue(RS_80_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_80_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_quantity","ss_list_price"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1298] + Group By Operator [GBY_1297] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1294] + Group By Operator [GBY_1291] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1286] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1284] + <-Reducer 31 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1332] + PartitionCols:_col0, _col1, _col2 + Select Operator [SEL_1331] (rows=1 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1330] (rows=1 width=20) + predicate:(_col3 = 3L) + Group By Operator [GBY_1329] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Union 30 [SIMPLE_EDGE] + <-Reducer 29 [CONTAINS] vectorized + Reduce Output Operator [RS_1405] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1404] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1403] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_25] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 + Merge Join Operator [MERGEJOIN_1133] (rows=14628613 width=11) + Conds:RS_21._col1=RS_1321._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1321] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1311] + <-Reducer 50 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Please refer to the previous Merge Join Operator [MERGEJOIN_1132] + <-Reducer 33 [CONTAINS] vectorized + Reduce Output Operator [RS_1413] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1412] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1411] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 32 [SIMPLE_EDGE] + SHUFFLE [RS_46] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_45] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 + Merge Join Operator [MERGEJOIN_1135] (rows=7620440 width=11) + Conds:RS_41._col1=RS_1322._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1322] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1311] + <-Reducer 53 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col1 + Please refer to the previous Merge Join Operator [MERGEJOIN_1134] + <-Reducer 35 [CONTAINS] vectorized + Reduce Output Operator [RS_1421] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_1420] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 + Group By Operator [GBY_1419] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 34 [SIMPLE_EDGE] + SHUFFLE [RS_67] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_66] (rows=121728 width=19) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 + Merge Join Operator [MERGEJOIN_1137] (rows=3828623 width=11) + Conds:RS_62._col1=RS_1323._col0(Inner),Output:["_col4","_col5","_col6"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1323] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1311] + <-Reducer 55 [SIMPLE_EDGE] + SHUFFLE [RS_62] + PartitionCols:_col1 + Please refer to the previous Merge Join Operator [MERGEJOIN_1136] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query2.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query2.q.out index 66777769e6..9e5c1582c4 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query2.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query2.q.out @@ -149,10 +149,10 @@ Stage-0 Select Operator [SEL_56] (rows=12881 width=788) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] Merge Join Operator [MERGEJOIN_146] (rows=12881 width=1572) - Conds:RS_53._col0=RS_54.(_col0 - 53)(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] + Conds:RS_53.(_col0 - 53)=RS_54._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_53] - PartitionCols:_col0 + PartitionCols:(_col0 - 53) Merge Join Operator [MERGEJOIN_143] (rows=652 width=788) Conds:RS_164._col0=RS_170._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 11 [SIMPLE_EDGE] vectorized @@ -161,7 +161,7 @@ Stage-0 Select Operator [SEL_168] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_166] (rows=652 width=8) - predicate:((d_year = 2001) and d_week_seq is not null) + predicate:((d_year = 2002) and d_week_seq is not null) TableScan [TS_20] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_week_seq","d_year"] <-Reducer 4 [SIMPLE_EDGE] vectorized @@ -208,7 +208,7 @@ Stage-0 Output:["cs_sold_date_sk","cs_ext_sales_price"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_54] - PartitionCols:(_col0 - 53) + PartitionCols:_col0 Merge Join Operator [MERGEJOIN_145] (rows=652 width=788) Conds:RS_165._col0=RS_171._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 11 [SIMPLE_EDGE] vectorized @@ -217,7 +217,7 @@ Stage-0 Select Operator [SEL_169] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_167] (rows=652 width=8) - predicate:((d_year = 2002) and d_week_seq is not null) + predicate:((d_year = 2001) and d_week_seq is not null) Please refer to the previous TableScan [TS_20] <-Reducer 4 [SIMPLE_EDGE] vectorized SHUFFLE [RS_165] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query59.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query59.q.out index f7c7260077..c147b9cd65 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query59.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query59.q.out @@ -95,142 +95,140 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 10 <- Map 15 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 14 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 15 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 8 <- Map 14 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized + Reducer 5 vectorized File Output Operator [FS_208] Limit [LIM_207] (rows=100 width=976) Number of rows:100 - Select Operator [SEL_206] (rows=1012347 width=976) + Select Operator [SEL_206] (rows=552189 width=976) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_57] - Select Operator [SEL_56] (rows=1012347 width=976) + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_58] + Select Operator [SEL_57] (rows=552189 width=976) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Top N Key Operator [TNK_95] (rows=1012347 width=1648) - keys:_col12, _col11, _col0,top n:100 - Merge Join Operator [MERGEJOIN_182] (rows=1012347 width=1648) - Conds:RS_53._col11, _col0=RS_54._col1, (_col0 - 52)(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col20"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_54] + Top N Key Operator [TNK_95] (rows=552189 width=1648) + keys:_col2, _col1, _col5,top n:100 + Merge Join Operator [MERGEJOIN_182] (rows=552189 width=1648) + Conds:RS_54._col3, _col5=RS_55._col1, (_col0 - 52)(Inner),Output:["_col1","_col2","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col17","_col18","_col19","_col20","_col21","_col22"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_55] PartitionCols:_col1, (_col0 - 52) - Select Operator [SEL_46] (rows=28847 width=776) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_181] (rows=28847 width=776) - Conds:RS_43._col1=RS_205._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_205] - PartitionCols:_col0 - Select Operator [SEL_204] (rows=1704 width=104) - Output:["_col0","_col1"] - TableScan [TS_38] (rows=1704 width=104) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_180] (rows=28847 width=676) - Conds:RS_203._col0=RS_199._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_199] - PartitionCols:_col0 - Select Operator [SEL_197] (rows=317 width=4) - Output:["_col0"] - Filter Operator [FIL_195] (rows=317 width=8) - predicate:(d_month_seq BETWEEN 1197 AND 1208 and d_week_seq is not null) - TableScan [TS_15] (rows=73049 width=8) - default@date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_week_seq"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_203] - PartitionCols:_col0 - Group By Operator [GBY_202] (rows=1196832 width=679) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)"],keys:KEY._col0, KEY._col1 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col0, _col1 - Group By Operator [GBY_31] (rows=525329897 width=679) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)","sum(_col3)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Select Operator [SEL_29] (rows=525329897 width=138) - Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_179] (rows=525329897 width=138) - Conds:RS_186._col0=RS_191._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col8","_col9","_col10","_col11"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_186] - PartitionCols:_col0 - Select Operator [SEL_184] (rows=525329897 width=114) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_183] (rows=525329897 width=114) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_191] - PartitionCols:_col0 - Select Operator [SEL_189] (rows=73049 width=36) - Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_187] (rows=73049 width=99) - predicate:d_week_seq is not null - TableScan [TS_3] (rows=73049 width=99) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq","d_day_name"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_53] - PartitionCols:_col11, _col0 - Merge Join Operator [MERGEJOIN_178] (rows=28847 width=976) - Conds:RS_50._col1=RS_201._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col11","_col12"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] + Merge Join Operator [MERGEJOIN_180] (rows=28847 width=676) + Conds:RS_205._col0=RS_203._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_203] PartitionCols:_col0 - Select Operator [SEL_200] (rows=1704 width=192) - Output:["_col0","_col1","_col2"] - TableScan [TS_18] (rows=1704 width=192) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_50] + Select Operator [SEL_201] (rows=317 width=4) + Output:["_col0"] + Filter Operator [FIL_199] (rows=317 width=8) + predicate:(d_month_seq BETWEEN 1197 AND 1208 and d_week_seq is not null) + TableScan [TS_19] (rows=73049 width=8) + default@date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_week_seq"] + <-Reducer 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_205] + PartitionCols:_col0 + Group By Operator [GBY_204] (rows=1196832 width=679) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)"],keys:KEY._col0, KEY._col1 + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col0, _col1 + Group By Operator [GBY_37] (rows=525329897 width=679) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)","sum(_col3)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 + Select Operator [SEL_35] (rows=525329897 width=138) + Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_179] (rows=525329897 width=138) + Conds:RS_190._col0=RS_195._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col8","_col9","_col10","_col11"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_195] + PartitionCols:_col0 + Select Operator [SEL_193] (rows=73049 width=36) + Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_191] (rows=73049 width=99) + predicate:d_week_seq is not null + TableScan [TS_7] (rows=73049 width=99) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq","d_day_name"] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_190] + PartitionCols:_col0 + Select Operator [SEL_188] (rows=525329897 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_187] (rows=525329897 width=114) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_4] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_54] + PartitionCols:_col3, _col5 + Merge Join Operator [MERGEJOIN_181] (rows=28847 width=980) + Conds:RS_51._col0=RS_52._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_52] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_177] (rows=28847 width=788) - Conds:RS_193._col0=RS_198._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] + Merge Join Operator [MERGEJOIN_178] (rows=28847 width=788) + Conds:RS_197._col0=RS_202._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_202] PartitionCols:_col0 - Select Operator [SEL_196] (rows=317 width=4) + Select Operator [SEL_200] (rows=317 width=4) Output:["_col0"] - Filter Operator [FIL_194] (rows=317 width=8) + Filter Operator [FIL_198] (rows=317 width=8) predicate:(d_month_seq BETWEEN 1185 AND 1196 and d_week_seq is not null) - Please refer to the previous TableScan [TS_15] - <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] + Please refer to the previous TableScan [TS_19] + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_197] PartitionCols:_col0 - Group By Operator [GBY_192] (rows=1196832 width=791) + Group By Operator [GBY_196] (rows=1196832 width=791) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_16] PartitionCols:_col0, _col1 - Group By Operator [GBY_11] (rows=525329897 width=791) + Group By Operator [GBY_15] (rows=525329897 width=791) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Select Operator [SEL_9] (rows=525329897 width=142) + Select Operator [SEL_13] (rows=525329897 width=142) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_176] (rows=525329897 width=142) - Conds:RS_185._col0=RS_190._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_185] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_184] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_190] + Merge Join Operator [MERGEJOIN_177] (rows=525329897 width=142) + Conds:RS_189._col0=RS_194._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_194] PartitionCols:_col0 - Select Operator [SEL_188] (rows=73049 width=36) + Select Operator [SEL_192] (rows=73049 width=36) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Please refer to the previous Filter Operator [FIL_187] + Please refer to the previous Filter Operator [FIL_191] + <-Map 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_189] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_188] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_51] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_176] (rows=3532 width=196) + Conds:RS_184._col1=RS_186._col1(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_184] + PartitionCols:_col1 + Select Operator [SEL_183] (rows=1704 width=192) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=1704 width=192) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_186] + PartitionCols:_col1 + Select Operator [SEL_185] (rows=1704 width=104) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=1704 width=104) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out index 39d35ec330..63da151c3a 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out @@ -75,121 +75,156 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 13 (BROADCAST_EDGE) -Map 15 <- Reducer 10 (BROADCAST_EDGE) -Map 18 <- Reducer 10 (BROADCAST_EDGE) -Map 19 <- Reducer 9 (BROADCAST_EDGE) -Map 23 <- Reducer 9 (BROADCAST_EDGE) -Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) -Reducer 21 <- Map 24 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (SIMPLE_EDGE) -Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 22 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Map 1 <- Reducer 12 (BROADCAST_EDGE) +Map 14 <- Reducer 9 (BROADCAST_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 19 <- Map 21 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE) +Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 20 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 8 vectorized - File Output Operator [FS_276] - Group By Operator [GBY_275] (rows=1 width=232) + File Output Operator [FS_251] + Group By Operator [GBY_250] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_274] - Group By Operator [GBY_273] (rows=1 width=232) + PARTITION_ONLY_SHUFFLE [RS_249] + Group By Operator [GBY_248] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_272] (rows=5022875 width=228) + Group By Operator [GBY_247] (rows=5022875 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_105] PartitionCols:_col0 Group By Operator [GBY_104] (rows=5022875 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3 - Merge Join Operator [MERGEJOIN_231] (rows=5022875 width=227) - Conds:RS_55._col3=RS_271._col0(Inner),Output:["_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_214] (rows=5022875 width=227) + Conds:RS_55._col3=RS_246._col0(Inner),Output:["_col3","_col4","_col5"] + <-Reducer 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_246] + PartitionCols:_col0 + Group By Operator [GBY_245] (rows=8007986 width=4) + Output:["_col0"],keys:KEY._col0 + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0 + Group By Operator [GBY_39] (rows=14398467 width=4) + Output:["_col0"],keys:_col2 + Select Operator [SEL_38] (rows=1384229733 width=11) + Output:["_col2"] + Filter Operator [FIL_37] (rows=1384229733 width=11) + predicate:(_col0 <> _col3) + Merge Join Operator [MERGEJOIN_212] (rows=1384229733 width=11) + Conds:RS_34._col1=RS_238._col1(Inner),Output:["_col0","_col2","_col3"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_238] + PartitionCols:_col1 + Select Operator [SEL_237] (rows=144002668 width=7) + Output:["_col0","_col1"] + TableScan [TS_29] (rows=144002668 width=7) + default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_211] (rows=141176436 width=11) + Conds:RS_239._col1=RS_244._col0(Inner),Output:["_col0","_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_239] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_237] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_244] + PartitionCols:_col0 + Select Operator [SEL_243] (rows=14398467 width=4) + Output:["_col0"] + TableScan [TS_27] (rows=14398467 width=4) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_order_number"] <-Reducer 5 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_55] + SHUFFLE [RS_55] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_230] (rows=5022875 width=227) - Conds:RS_52._col3=RS_259._col0(Inner),Output:["_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_213] (rows=5022875 width=227) + Conds:RS_52._col3=RS_242._col0(Inner),Output:["_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_52] + PARTITION_ONLY_SHUFFLE [RS_52] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_228] (rows=5022875 width=227) - Conds:RS_49._col2=RS_248._col0(Inner),Output:["_col3","_col4","_col5"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_248] + Merge Join Operator [MERGEJOIN_209] (rows=5022875 width=227) + Conds:RS_49._col2=RS_231._col0(Inner),Output:["_col3","_col4","_col5"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] PartitionCols:_col0 - Select Operator [SEL_247] (rows=12 width=4) + Select Operator [SEL_230] (rows=12 width=4) Output:["_col0"] - Filter Operator [FIL_246] (rows=12 width=92) + Filter Operator [FIL_229] (rows=12 width=92) predicate:(web_company_name = 'pri') TableScan [TS_9] (rows=84 width=92) default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_company_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_227] (rows=15673790 width=231) - Conds:RS_46._col1=RS_234._col0(Inner),Output:["_col2","_col3","_col4","_col5"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] + Merge Join Operator [MERGEJOIN_208] (rows=15673790 width=231) + Conds:RS_46._col1=RS_217._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_217] PartitionCols:_col0 - Select Operator [SEL_233] (rows=784314 width=4) + Select Operator [SEL_216] (rows=784314 width=4) Output:["_col0"] - Filter Operator [FIL_232] (rows=784314 width=90) + Filter Operator [FIL_215] (rows=784314 width=90) predicate:(ca_state = 'TX') TableScan [TS_6] (rows=40000000 width=90) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_46] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_226] (rows=15987241 width=235) - Conds:RS_242._col0=RS_245._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_207] (rows=15987241 width=235) + Conds:RS_225._col0=RS_228._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_242] + SHUFFLE [RS_225] PartitionCols:_col0 - Select Operator [SEL_241] (rows=143895019 width=239) + Select Operator [SEL_224] (rows=143895019 width=239) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_240] (rows=143895019 width=239) + Filter Operator [FIL_223] (rows=143895019 width=239) predicate:(ws_web_site_sk is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_ship_addr_sk BETWEEN DynamicValue(RS_47_customer_address_ca_address_sk_min) AND DynamicValue(RS_47_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_47_customer_address_ca_address_sk_bloom_filter))) TableScan [TS_0] (rows=144002668 width=239) default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_239] - Group By Operator [GBY_238] (rows=1 width=12) + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_222] + Group By Operator [GBY_221] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_237] - Group By Operator [GBY_236] (rows=1 width=12) + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_220] + Group By Operator [GBY_219] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_235] (rows=784314 width=4) + Select Operator [SEL_218] (rows=784314 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_233] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_245] + Please refer to the previous Select Operator [SEL_216] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_228] PartitionCols:_col0 - Select Operator [SEL_244] (rows=8116 width=98) + Select Operator [SEL_227] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_243] (rows=8116 width=98) + Filter Operator [FIL_226] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' TableScan [TS_3] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_259] + <-Reducer 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_242] PartitionCols:_col0 - Group By Operator [GBY_258] (rows=14686712 width=4) + Group By Operator [GBY_241] (rows=14686712 width=4) Output:["_col0"],keys:KEY._col0 - <-Reducer 16 [SIMPLE_EDGE] + <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col0 Group By Operator [GBY_21] (rows=144002668 width=4) @@ -198,96 +233,30 @@ Stage-0 Output:["_col1"] Filter Operator [FIL_19] (rows=1411940834 width=11) predicate:(_col0 <> _col2) - Merge Join Operator [MERGEJOIN_229] (rows=1411940834 width=11) - Conds:RS_254._col1=RS_257._col1(Inner),Output:["_col0","_col1","_col2"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_254] + Merge Join Operator [MERGEJOIN_210] (rows=1411940834 width=11) + Conds:RS_240._col1=RS_236._col1(Inner),Output:["_col0","_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_240] PartitionCols:_col1 - Select Operator [SEL_253] (rows=144002668 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_252] (rows=144002668 width=7) - predicate:(ws_order_number BETWEEN DynamicValue(RS_52_ws1_ws_order_number_min) AND DynamicValue(RS_52_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_52_ws1_ws_order_number_bloom_filter))) - TableScan [TS_12] (rows=144002668 width=7) - default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_250] - Group By Operator [GBY_249] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_183] - Group By Operator [GBY_182] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_181] (rows=5022875 width=8) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_228] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_257] + Please refer to the previous Select Operator [SEL_237] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_236] PartitionCols:_col1 - Select Operator [SEL_256] (rows=144002668 width=7) + Select Operator [SEL_235] (rows=144002668 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_255] (rows=144002668 width=7) + Filter Operator [FIL_234] (rows=144002668 width=7) predicate:(ws_order_number BETWEEN DynamicValue(RS_52_ws1_ws_order_number_min) AND DynamicValue(RS_52_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_52_ws1_ws_order_number_bloom_filter))) TableScan [TS_14] (rows=144002668 width=7) default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_251] - Please refer to the previous Group By Operator [GBY_249] - <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_271] - PartitionCols:_col0 - Group By Operator [GBY_270] (rows=8007986 width=4) - Output:["_col0"],keys:KEY._col0 - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col0 - Group By Operator [GBY_39] (rows=14398467 width=4) - Output:["_col0"],keys:_col14 - Merge Join Operator [MERGEJOIN_225] (rows=1384229738 width=4) - Conds:RS_35._col0=RS_269.wr_order_number(Inner),Output:["_col14"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_269] - PartitionCols:wr_order_number - TableScan [TS_34] (rows=14398467 width=4) - default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_order_number"] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_35] - PartitionCols:_col0 - Select Operator [SEL_33] (rows=1411940834 width=4) - Output:["_col0"] - Filter Operator [FIL_32] (rows=1411940834 width=11) - predicate:(_col0 <> _col2) - Merge Join Operator [MERGEJOIN_224] (rows=1411940834 width=11) - Conds:RS_265._col1=RS_268._col1(Inner),Output:["_col0","_col1","_col2"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_265] - PartitionCols:_col1 - Select Operator [SEL_264] (rows=144002668 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_263] (rows=144002668 width=7) - predicate:(ws_order_number BETWEEN DynamicValue(RS_55_ws1_ws_order_number_min) AND DynamicValue(RS_55_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_55_ws1_ws_order_number_bloom_filter))) - TableScan [TS_25] (rows=144002668 width=7) - default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_261] - Group By Operator [GBY_260] (rows=1 width=12) + BROADCAST [RS_233] + Group By Operator [GBY_232] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_202] - Group By Operator [GBY_201] (rows=1 width=12) + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_163] + Group By Operator [GBY_162] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_200] (rows=5022875 width=8) + Select Operator [SEL_161] (rows=5022875 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_230] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_268] - PartitionCols:_col1 - Select Operator [SEL_267] (rows=144002668 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_266] (rows=144002668 width=7) - predicate:(ws_order_number BETWEEN DynamicValue(RS_55_ws1_ws_order_number_min) AND DynamicValue(RS_55_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_55_ws1_ws_order_number_bloom_filter))) - TableScan [TS_27] (rows=144002668 width=7) - default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_262] - Please refer to the previous Group By Operator [GBY_260] + Please refer to the previous Merge Join Operator [MERGEJOIN_209] diff --git ql/src/test/results/clientpositive/perf/tez/query2.q.out ql/src/test/results/clientpositive/perf/tez/query2.q.out index 0e67e97c02..4dff1b2da6 100644 --- ql/src/test/results/clientpositive/perf/tez/query2.q.out +++ ql/src/test/results/clientpositive/perf/tez/query2.q.out @@ -149,10 +149,10 @@ Stage-0 Select Operator [SEL_56] (rows=12881 width=788) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] Merge Join Operator [MERGEJOIN_146] (rows=12881 width=1572) - Conds:RS_53._col0=RS_54.(_col0 - 53)(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] + Conds:RS_53.(_col0 - 53)=RS_54._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_53] - PartitionCols:_col0 + PartitionCols:(_col0 - 53) Merge Join Operator [MERGEJOIN_143] (rows=652 width=788) Conds:RS_164._col0=RS_170._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 11 [SIMPLE_EDGE] vectorized @@ -161,7 +161,7 @@ Stage-0 Select Operator [SEL_168] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_166] (rows=652 width=8) - predicate:((d_year = 2001) and d_week_seq is not null) + predicate:((d_year = 2002) and d_week_seq is not null) TableScan [TS_20] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_week_seq","d_year"] <-Reducer 4 [SIMPLE_EDGE] vectorized @@ -208,7 +208,7 @@ Stage-0 Output:["cs_sold_date_sk","cs_ext_sales_price"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_54] - PartitionCols:(_col0 - 53) + PartitionCols:_col0 Merge Join Operator [MERGEJOIN_145] (rows=652 width=788) Conds:RS_165._col0=RS_171._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 11 [SIMPLE_EDGE] vectorized @@ -217,7 +217,7 @@ Stage-0 Select Operator [SEL_169] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_167] (rows=652 width=8) - predicate:((d_year = 2002) and d_week_seq is not null) + predicate:((d_year = 2001) and d_week_seq is not null) Please refer to the previous TableScan [TS_20] <-Reducer 4 [SIMPLE_EDGE] vectorized SHUFFLE [RS_165] diff --git ql/src/test/results/clientpositive/perf/tez/query59.q.out ql/src/test/results/clientpositive/perf/tez/query59.q.out index 1a2ba964f4..c90e788742 100644 --- ql/src/test/results/clientpositive/perf/tez/query59.q.out +++ ql/src/test/results/clientpositive/perf/tez/query59.q.out @@ -95,146 +95,144 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 13 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 7 <- Map 15 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_212] - Limit [LIM_211] (rows=100 width=976) + Reducer 8 vectorized + File Output Operator [FS_215] + Limit [LIM_214] (rows=100 width=976) Number of rows:100 - Select Operator [SEL_210] (rows=1012347 width=976) + Select Operator [SEL_213] (rows=117616339 width=976) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - <-Reducer 6 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_59] - Select Operator [SEL_58] (rows=1012347 width=976) + Select Operator [SEL_58] (rows=117616339 width=976) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Top N Key Operator [TNK_97] (rows=1012347 width=1648) + Top N Key Operator [TNK_100] (rows=117616339 width=1648) keys:_col12, _col11, _col0,top n:100 - Merge Join Operator [MERGEJOIN_184] (rows=1012347 width=1648) - Conds:RS_55._col11, _col0=RS_56._col1, (_col0 - 52)(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col20"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col1, (_col0 - 52) - Select Operator [SEL_48] (rows=28847 width=776) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_183] (rows=28847 width=776) - Conds:RS_45._col1=RS_209._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col10"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] - PartitionCols:_col0 - Select Operator [SEL_208] (rows=1704 width=104) - Output:["_col0","_col1"] - Filter Operator [FIL_207] (rows=1704 width=104) - predicate:(s_store_sk is not null and s_store_id is not null) - TableScan [TS_39] (rows=1704 width=104) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_182] (rows=28847 width=676) - Conds:RS_206._col0=RS_201._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] - PartitionCols:_col0 - Select Operator [SEL_199] (rows=317 width=4) - Output:["_col0"] - Filter Operator [FIL_197] (rows=317 width=8) - predicate:(d_month_seq BETWEEN 1197 AND 1208 and d_week_seq is not null) - TableScan [TS_15] (rows=73049 width=8) - default@date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_week_seq"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_206] - PartitionCols:_col0 - Group By Operator [GBY_205] (rows=1196832 width=679) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)"],keys:KEY._col0, KEY._col1 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_33] - PartitionCols:_col0, _col1 - Group By Operator [GBY_32] (rows=525329897 width=679) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)","sum(_col3)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Select Operator [SEL_30] (rows=525329897 width=138) - Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_181] (rows=525329897 width=138) - Conds:RS_188._col0=RS_193._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col8","_col9","_col10","_col11"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_188] - PartitionCols:_col0 - Select Operator [SEL_186] (rows=525329897 width=114) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_185] (rows=525329897 width=114) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] - PartitionCols:_col0 - Select Operator [SEL_191] (rows=73049 width=36) - Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_189] (rows=73049 width=99) - predicate:(d_date_sk is not null and d_week_seq is not null) - TableScan [TS_3] (rows=73049 width=99) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq","d_day_name"] - <-Reducer 5 [SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_187] (rows=117616339 width=1648) + Conds:RS_55._col11, _col14=RS_212._col1, _col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col20"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_212] + PartitionCols:_col1, _col0 + Select Operator [SEL_211] (rows=1704 width=104) + Output:["_col0","_col1"] + Filter Operator [FIL_210] (rows=1704 width=104) + predicate:(s_store_sk is not null and s_store_id is not null) + TableScan [TS_43] (rows=1704 width=104) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] + <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_55] - PartitionCols:_col11, _col0 - Merge Join Operator [MERGEJOIN_180] (rows=28847 width=976) - Conds:RS_52._col1=RS_204._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col11","_col12"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_204] - PartitionCols:_col0 - Select Operator [SEL_203] (rows=1704 width=192) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_202] (rows=1704 width=192) - predicate:(s_store_sk is not null and s_store_id is not null) - TableScan [TS_18] (rows=1704 width=192) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_52] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_179] (rows=28847 width=788) - Conds:RS_195._col0=RS_200._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + PartitionCols:_col11, _col14 + Merge Join Operator [MERGEJOIN_186] (rows=104018676 width=1651) + Conds:RS_52._col0=RS_53.(_col0 - 52)(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col11","_col12","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:(_col0 - 52) + Merge Join Operator [MERGEJOIN_185] (rows=28847 width=676) + Conds:RS_209._col0=RS_204._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_200] + SHUFFLE [RS_204] PartitionCols:_col0 - Select Operator [SEL_198] (rows=317 width=4) + Select Operator [SEL_202] (rows=317 width=4) Output:["_col0"] - Filter Operator [FIL_196] (rows=317 width=8) - predicate:(d_month_seq BETWEEN 1185 AND 1196 and d_week_seq is not null) - Please refer to the previous TableScan [TS_15] - <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_195] + Filter Operator [FIL_200] (rows=317 width=8) + predicate:(d_month_seq BETWEEN 1197 AND 1208 and d_week_seq is not null) + TableScan [TS_15] (rows=73049 width=8) + default@date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_week_seq"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_209] PartitionCols:_col0 - Group By Operator [GBY_194] (rows=1196832 width=791) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] + Group By Operator [GBY_208] (rows=1196832 width=679) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)"],keys:KEY._col0, KEY._col1 + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_33] PartitionCols:_col0, _col1 - Group By Operator [GBY_11] (rows=525329897 width=791) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Select Operator [SEL_9] (rows=525329897 width=142) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_178] (rows=525329897 width=142) - Conds:RS_187._col0=RS_192._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Group By Operator [GBY_32] (rows=525329897 width=679) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)","sum(_col3)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 + Select Operator [SEL_30] (rows=525329897 width=138) + Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_184] (rows=525329897 width=138) + Conds:RS_191._col0=RS_196._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col8","_col9","_col10","_col11"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_187] + SHUFFLE [RS_191] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_186] + Select Operator [SEL_189] (rows=525329897 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_188] (rows=525329897 width=114) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_192] + SHUFFLE [RS_196] PartitionCols:_col0 - Select Operator [SEL_190] (rows=73049 width=36) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Please refer to the previous Filter Operator [FIL_189] + Select Operator [SEL_194] (rows=73049 width=36) + Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_192] (rows=73049 width=99) + predicate:(d_date_sk is not null and d_week_seq is not null) + TableScan [TS_3] (rows=73049 width=99) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq","d_day_name"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_52] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_183] (rows=28847 width=976) + Conds:RS_49._col1=RS_207._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col11","_col12"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_207] + PartitionCols:_col0 + Select Operator [SEL_206] (rows=1704 width=192) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_205] (rows=1704 width=192) + predicate:(s_store_sk is not null and s_store_id is not null) + TableScan [TS_18] (rows=1704 width=192) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_182] (rows=28847 width=788) + Conds:RS_198._col0=RS_203._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_203] + PartitionCols:_col0 + Select Operator [SEL_201] (rows=317 width=4) + Output:["_col0"] + Filter Operator [FIL_199] (rows=317 width=8) + predicate:(d_month_seq BETWEEN 1185 AND 1196 and d_week_seq is not null) + Please refer to the previous TableScan [TS_15] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_198] + PartitionCols:_col0 + Group By Operator [GBY_197] (rows=1196832 width=791) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0, _col1 + Group By Operator [GBY_11] (rows=525329897 width=791) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 + Select Operator [SEL_9] (rows=525329897 width=142) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_181] (rows=525329897 width=142) + Conds:RS_190._col0=RS_195._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_190] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_189] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_195] + PartitionCols:_col0 + Select Operator [SEL_193] (rows=73049 width=36) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Please refer to the previous Filter Operator [FIL_192] diff --git ql/src/test/results/clientpositive/perf/tez/query95.q.out ql/src/test/results/clientpositive/perf/tez/query95.q.out index f15afbed4b..327f3cafb6 100644 --- ql/src/test/results/clientpositive/perf/tez/query95.q.out +++ ql/src/test/results/clientpositive/perf/tez/query95.q.out @@ -75,121 +75,160 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 13 (BROADCAST_EDGE) -Map 15 <- Reducer 10 (BROADCAST_EDGE) -Map 18 <- Reducer 10 (BROADCAST_EDGE) -Map 19 <- Reducer 9 (BROADCAST_EDGE) -Map 23 <- Reducer 9 (BROADCAST_EDGE) -Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) -Reducer 21 <- Map 24 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Reducer 21 (SIMPLE_EDGE) -Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 22 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Map 1 <- Reducer 12 (BROADCAST_EDGE) +Map 14 <- Reducer 9 (BROADCAST_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 19 <- Map 21 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE) +Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 20 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 8 vectorized - File Output Operator [FS_284] - Group By Operator [GBY_283] (rows=1 width=232) + File Output Operator [FS_258] + Group By Operator [GBY_257] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_282] - Group By Operator [GBY_281] (rows=1 width=232) + PARTITION_ONLY_SHUFFLE [RS_256] + Group By Operator [GBY_255] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_280] (rows=5022875 width=228) + Group By Operator [GBY_254] (rows=5022875 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_111] + SHUFFLE [RS_110] PartitionCols:_col0 - Group By Operator [GBY_110] (rows=5022875 width=228) + Group By Operator [GBY_109] (rows=5022875 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3 - Merge Join Operator [MERGEJOIN_237] (rows=5022875 width=227) - Conds:RS_61._col3=RS_279._col0(Inner),Output:["_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_219] (rows=5022875 width=227) + Conds:RS_60._col3=RS_253._col0(Inner),Output:["_col3","_col4","_col5"] + <-Reducer 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_253] + PartitionCols:_col0 + Group By Operator [GBY_252] (rows=8007986 width=4) + Output:["_col0"],keys:KEY._col0 + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col0 + Group By Operator [GBY_44] (rows=14398467 width=4) + Output:["_col0"],keys:_col2 + Select Operator [SEL_43] (rows=1384229733 width=11) + Output:["_col2"] + Filter Operator [FIL_42] (rows=1384229733 width=11) + predicate:(_col0 <> _col3) + Merge Join Operator [MERGEJOIN_217] (rows=1384229733 width=11) + Conds:RS_39._col1=RS_244._col1(Inner),Output:["_col0","_col2","_col3"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_244] + PartitionCols:_col1 + Select Operator [SEL_243] (rows=144002668 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_242] (rows=144002668 width=7) + predicate:ws_order_number is not null + TableScan [TS_33] (rows=144002668 width=7) + default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_216] (rows=141176436 width=11) + Conds:RS_245._col1=RS_251._col0(Inner),Output:["_col0","_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_245] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_243] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_251] + PartitionCols:_col0 + Select Operator [SEL_250] (rows=14398467 width=4) + Output:["_col0"] + Filter Operator [FIL_249] (rows=14398467 width=4) + predicate:wr_order_number is not null + TableScan [TS_30] (rows=14398467 width=4) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_order_number"] <-Reducer 5 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_61] + SHUFFLE [RS_60] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_236] (rows=5022875 width=227) - Conds:RS_58._col3=RS_265._col0(Inner),Output:["_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_218] (rows=5022875 width=227) + Conds:RS_57._col3=RS_248._col0(Inner),Output:["_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_58] + PARTITION_ONLY_SHUFFLE [RS_57] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_232] (rows=5022875 width=227) - Conds:RS_55._col2=RS_254._col0(Inner),Output:["_col3","_col4","_col5"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_254] + Merge Join Operator [MERGEJOIN_214] (rows=5022875 width=227) + Conds:RS_54._col2=RS_236._col0(Inner),Output:["_col3","_col4","_col5"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_236] PartitionCols:_col0 - Select Operator [SEL_253] (rows=12 width=4) + Select Operator [SEL_235] (rows=12 width=4) Output:["_col0"] - Filter Operator [FIL_252] (rows=12 width=92) + Filter Operator [FIL_234] (rows=12 width=92) predicate:((web_company_name = 'pri') and web_site_sk is not null) TableScan [TS_9] (rows=84 width=92) default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_company_name"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_55] + SHUFFLE [RS_54] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_231] (rows=15673790 width=231) - Conds:RS_52._col1=RS_240._col0(Inner),Output:["_col2","_col3","_col4","_col5"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_240] + Merge Join Operator [MERGEJOIN_213] (rows=15673790 width=231) + Conds:RS_51._col1=RS_222._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_222] PartitionCols:_col0 - Select Operator [SEL_239] (rows=784314 width=4) + Select Operator [SEL_221] (rows=784314 width=4) Output:["_col0"] - Filter Operator [FIL_238] (rows=784314 width=90) + Filter Operator [FIL_220] (rows=784314 width=90) predicate:((ca_state = 'TX') and ca_address_sk is not null) TableScan [TS_6] (rows=40000000 width=90) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_52] + SHUFFLE [RS_51] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_230] (rows=15987241 width=235) - Conds:RS_248._col0=RS_251._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_212] (rows=15987241 width=235) + Conds:RS_230._col0=RS_233._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_248] + SHUFFLE [RS_230] PartitionCols:_col0 - Select Operator [SEL_247] (rows=143895019 width=239) + Select Operator [SEL_229] (rows=143895019 width=239) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_246] (rows=143895019 width=239) - predicate:(ws_web_site_sk is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_order_number is not null and ws_ship_addr_sk BETWEEN DynamicValue(RS_53_customer_address_ca_address_sk_min) AND DynamicValue(RS_53_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_53_customer_address_ca_address_sk_bloom_filter))) + Filter Operator [FIL_228] (rows=143895019 width=239) + predicate:(ws_web_site_sk is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_order_number is not null and ws_ship_addr_sk BETWEEN DynamicValue(RS_52_customer_address_ca_address_sk_min) AND DynamicValue(RS_52_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_52_customer_address_ca_address_sk_bloom_filter))) TableScan [TS_0] (rows=144002668 width=239) default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_245] - Group By Operator [GBY_244] (rows=1 width=12) + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_227] + Group By Operator [GBY_226] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_243] - Group By Operator [GBY_242] (rows=1 width=12) + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_225] + Group By Operator [GBY_224] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_241] (rows=784314 width=4) + Select Operator [SEL_223] (rows=784314 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_239] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_251] + Please refer to the previous Select Operator [SEL_221] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_233] PartitionCols:_col0 - Select Operator [SEL_250] (rows=8116 width=98) + Select Operator [SEL_232] (rows=8116 width=98) Output:["_col0"] - Filter Operator [FIL_249] (rows=8116 width=98) + Filter Operator [FIL_231] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_265] + <-Reducer 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_248] PartitionCols:_col0 - Group By Operator [GBY_264] (rows=14686712 width=4) + Group By Operator [GBY_247] (rows=14686712 width=4) Output:["_col0"],keys:KEY._col0 - <-Reducer 16 [SIMPLE_EDGE] + <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col0 Group By Operator [GBY_23] (rows=144002668 width=4) @@ -198,100 +237,30 @@ Stage-0 Output:["_col1"] Filter Operator [FIL_21] (rows=1411940834 width=11) predicate:(_col0 <> _col2) - Merge Join Operator [MERGEJOIN_233] (rows=1411940834 width=11) - Conds:RS_260._col1=RS_263._col1(Inner),Output:["_col0","_col1","_col2"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_260] + Merge Join Operator [MERGEJOIN_215] (rows=1411940834 width=11) + Conds:RS_246._col1=RS_241._col1(Inner),Output:["_col0","_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_246] PartitionCols:_col1 - Select Operator [SEL_259] (rows=144002668 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_258] (rows=144002668 width=7) - predicate:(ws_order_number is not null and ws_order_number BETWEEN DynamicValue(RS_58_ws1_ws_order_number_min) AND DynamicValue(RS_58_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_58_ws1_ws_order_number_bloom_filter))) - TableScan [TS_12] (rows=144002668 width=7) - default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_256] - Group By Operator [GBY_255] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_189] - Group By Operator [GBY_188] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_187] (rows=5022875 width=8) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_232] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_263] + Please refer to the previous Select Operator [SEL_243] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_241] PartitionCols:_col1 - Select Operator [SEL_262] (rows=144002668 width=7) + Select Operator [SEL_240] (rows=144002668 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_261] (rows=144002668 width=7) - predicate:(ws_order_number is not null and ws_order_number BETWEEN DynamicValue(RS_58_ws1_ws_order_number_min) AND DynamicValue(RS_58_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_58_ws1_ws_order_number_bloom_filter))) + Filter Operator [FIL_239] (rows=144002668 width=7) + predicate:(ws_order_number is not null and ws_order_number BETWEEN DynamicValue(RS_57_ws1_ws_order_number_min) AND DynamicValue(RS_57_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_57_ws1_ws_order_number_bloom_filter))) TableScan [TS_15] (rows=144002668 width=7) default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_257] - Please refer to the previous Group By Operator [GBY_255] - <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_279] - PartitionCols:_col0 - Group By Operator [GBY_278] (rows=8007986 width=4) - Output:["_col0"],keys:KEY._col0 - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_46] - PartitionCols:_col0 - Group By Operator [GBY_45] (rows=14398467 width=4) - Output:["_col0"],keys:_col14 - Merge Join Operator [MERGEJOIN_235] (rows=1384229738 width=4) - Conds:RS_41._col0=RS_277._col13(Inner),Output:["_col14"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_277] - PartitionCols:_col13 - Select Operator [SEL_276] (rows=14398467 width=272) - Output:["_col13"] - Filter Operator [FIL_275] (rows=14398467 width=4) - predicate:wr_order_number is not null - TableScan [TS_38] (rows=14398467 width=4) - default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_order_number"] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col0 - Select Operator [SEL_37] (rows=1411940834 width=4) - Output:["_col0"] - Filter Operator [FIL_36] (rows=1411940834 width=11) - predicate:(_col0 <> _col2) - Merge Join Operator [MERGEJOIN_234] (rows=1411940834 width=11) - Conds:RS_271._col1=RS_274._col1(Inner),Output:["_col0","_col1","_col2"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_271] - PartitionCols:_col1 - Select Operator [SEL_270] (rows=144002668 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_269] (rows=144002668 width=7) - predicate:(ws_order_number is not null and ws_order_number BETWEEN DynamicValue(RS_61_ws1_ws_order_number_min) AND DynamicValue(RS_61_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_61_ws1_ws_order_number_bloom_filter))) - TableScan [TS_27] (rows=144002668 width=7) - default@web_sales,ws1,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_267] - Group By Operator [GBY_266] (rows=1 width=12) + BROADCAST [RS_238] + Group By Operator [GBY_237] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_208] - Group By Operator [GBY_207] (rows=1 width=12) + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_168] + Group By Operator [GBY_167] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_206] (rows=5022875 width=8) + Select Operator [SEL_166] (rows=5022875 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_236] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_274] - PartitionCols:_col1 - Select Operator [SEL_273] (rows=144002668 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_272] (rows=144002668 width=7) - predicate:(ws_order_number is not null and ws_order_number BETWEEN DynamicValue(RS_61_ws1_ws_order_number_min) AND DynamicValue(RS_61_ws1_ws_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_61_ws1_ws_order_number_bloom_filter))) - TableScan [TS_30] (rows=144002668 width=7) - default@web_sales,ws2,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_268] - Please refer to the previous Group By Operator [GBY_266] + Please refer to the previous Merge Join Operator [MERGEJOIN_214] diff --git ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out index 9547e4fa7c..406f3d3f82 100644 --- ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out +++ ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out @@ -35,32 +35,32 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_n94 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-18 is a root stage - Stage-13 depends on stages: Stage-18 - Stage-12 depends on stages: Stage-13 , consists of Stage-16, Stage-17, Stage-1 - Stage-16 has a backup stage: Stage-1 - Stage-10 depends on stages: Stage-16 - Stage-9 depends on stages: Stage-1, Stage-10, Stage-11 , consists of Stage-14, Stage-15, Stage-2 - Stage-14 has a backup stage: Stage-2 - Stage-7 depends on stages: Stage-14 - Stage-3 depends on stages: Stage-2, Stage-7, Stage-8 - Stage-15 has a backup stage: Stage-2 + Stage-19 is a root stage + Stage-14 depends on stages: Stage-19 + Stage-13 depends on stages: Stage-14 , consists of Stage-17, Stage-18, Stage-2 + Stage-17 has a backup stage: Stage-2 + Stage-11 depends on stages: Stage-17 + Stage-10 depends on stages: Stage-2, Stage-11, Stage-12 , consists of Stage-15, Stage-16, Stage-3 + Stage-15 has a backup stage: Stage-3 Stage-8 depends on stages: Stage-15 + Stage-4 depends on stages: Stage-3, Stage-8, Stage-9 + Stage-16 has a backup stage: Stage-3 + Stage-9 depends on stages: Stage-16 + Stage-3 + Stage-18 has a backup stage: Stage-2 + Stage-12 depends on stages: Stage-18 Stage-2 - Stage-17 has a backup stage: Stage-1 - Stage-11 depends on stages: Stage-17 - Stage-1 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-18 + Stage: Stage-19 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_2:$hdt$_3:t1_n94 + $hdt$_3:t1_n94 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_2:$hdt$_3:t1_n94 + $hdt$_3:t1_n94 TableScan alias: t1_n94 filterExpr: key is not null (type: boolean) @@ -77,7 +77,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -109,26 +109,17 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-12 + Stage: Stage-13 Conditional Operator - Stage: Stage-16 + Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-10 - Map Reduce - Map Operator Tree: + $hdt$_1:src1 TableScan alias: src1 filterExpr: key is not null (type: boolean) @@ -140,28 +131,37 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 + HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2 + Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-9 + Stage: Stage-10 Conditional Operator - Stage: Stage-14 + Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src2 @@ -182,10 +182,10 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) - Stage: Stage-7 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -193,7 +193,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) Statistics: Num rows: 665 Data size: 57898 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -212,7 +212,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -236,7 +236,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-15 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -247,10 +247,10 @@ STAGE PLANS: TableScan HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) - Stage: Stage-8 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -268,7 +268,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) Statistics: Num rows: 665 Data size: 57898 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -287,15 +287,15 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col2 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE TableScan alias: src2 @@ -319,7 +319,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) Statistics: Num rows: 665 Data size: 57898 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -335,14 +335,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-17 + Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:src1 + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:src1 + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-12 + Map Reduce + Map Operator Tree: TableScan alias: src1 filterExpr: key is not null (type: boolean) @@ -354,36 +363,34 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - - Stage: Stage-11 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col2 + Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE TableScan alias: src1 filterExpr: key is not null (type: boolean) @@ -401,13 +408,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -415,7 +415,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 + outputColumnNames: _col2 Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/smb_mapjoin_25.q.out ql/src/test/results/clientpositive/smb_mapjoin_25.q.out index 8fb82e1659..790ed2ef41 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_25.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_25.q.out @@ -46,9 +46,9 @@ POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' over POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3_n4 -Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -Warning: Shuffle Join JOIN[14][tables = [$hdt$_2, $hdt$_3]] in Stage 'Stage-4:MAPRED' is a cross product +Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-3:MAPRED' is a cross product +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY @@ -65,9 +65,9 @@ POSTHOOK: Input: default@smb_bucket_3_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -87,7 +87,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: d filterExpr: (key = 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -122,34 +122,6 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: TableScan alias: c filterExpr: (key = 5) (type: boolean) @@ -163,8 +135,31 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: TableScan - alias: d + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: b filterExpr: (key = 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -183,13 +178,18 @@ STAGE PLANS: keys: 0 1 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int), 5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -197,11 +197,9 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[47][bigTable=?] in task 'Stage-7:MAPRED' is a cross product -Warning: Map Join MAPJOIN[39][bigTable=?] in task 'Stage-6:MAPRED' is a cross product -Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Stage-9:MAPRED' is a cross product -Warning: Map Join MAPJOIN[49][bigTable=?] in task 'Stage-10:MAPRED' is a cross product +Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-7:MAPRED' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-7:MAPRED' is a cross product +Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Stage-7:MAPRED' is a cross product PREHOOK: query: explain select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY @@ -217,29 +215,27 @@ POSTHOOK: Input: default@smb_bucket_2_n4 POSTHOOK: Input: default@smb_bucket_3_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-13 is a root stage - Stage-9 depends on stages: Stage-13 - Stage-8 depends on stages: Stage-9, Stage-10 , consists of Stage-11, Stage-12, Stage-2 - Stage-11 has a backup stage: Stage-2 - Stage-6 depends on stages: Stage-11 - Stage-12 has a backup stage: Stage-2 - Stage-7 depends on stages: Stage-12 - Stage-2 - Stage-14 is a root stage - Stage-10 depends on stages: Stage-14 - Stage-0 depends on stages: Stage-6, Stage-7, Stage-2 + Stage-10 is a root stage + Stage-7 depends on stages: Stage-10 + Stage-0 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-13 + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:b + $hdt$_0:a + Fetch Operator + limit: -1 + $hdt$_2:c + Fetch Operator + limit: -1 + $hdt$_3:b Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:b + $hdt$_0:a TableScan - alias: b + alias: a filterExpr: (key = 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -251,12 +247,9 @@ STAGE PLANS: keys: 0 1 - - Stage: Stage-9 - Map Reduce - Map Operator Tree: + $hdt$_2:c TableScan - alias: a + alias: c filterExpr: (key = 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -264,147 +257,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 + HashTable Sink Operator keys: 0 1 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-11 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-12 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Map Operator Tree: + $hdt$_3:b TableScan - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-14 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_2:$hdt$_2:c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_2:$hdt$_2:c - TableScan - alias: c + alias: b filterExpr: (key = 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -417,7 +276,7 @@ STAGE PLANS: 0 1 - Stage: Stage-10 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -436,12 +295,31 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int), 5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -452,11 +330,9 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[47][bigTable=?] in task 'Stage-7:MAPRED' is a cross product -Warning: Map Join MAPJOIN[39][bigTable=?] in task 'Stage-6:MAPRED' is a cross product -Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Stage-9:MAPRED' is a cross product -Warning: Map Join MAPJOIN[49][bigTable=?] in task 'Stage-10:MAPRED' is a cross product +Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-7:MAPRED' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-7:MAPRED' is a cross product +Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Stage-7:MAPRED' is a cross product PREHOOK: query: select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY PREHOOK: Input: default@smb_bucket_1_n4