diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index c55f8db61a..7825774244 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -605,6 +605,7 @@ minillaplocal.query.files=\ join_nullsafe.q,\ join_is_not_distinct_from.q,\ join_reordering_no_stats.q,\ + join_reorder5.q,\ kryo.q,\ leftsemijoin_mr.q,\ limit_join_transpose.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java index 492c55e050..19092c2cd3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinProjectTransposeRule.java @@ -29,20 +29,54 @@ public class HiveJoinProjectTransposeRule extends JoinProjectTransposeRule { public static final HiveJoinProjectTransposeRule LEFT_PROJECT_BTW_JOIN = - new HiveJoinProjectTransposeRule( + new HiveJoinLeftProjectBtwJoinTransposeRule(); + + private static final class HiveJoinLeftProjectBtwJoinTransposeRule extends HiveJoinProjectTransposeRule { + + public HiveJoinLeftProjectBtwJoinTransposeRule() { + super( operand(HiveJoin.class, - operand(HiveProject.class, operand(HiveJoin.class, any())), - operand(RelNode.class, any())), + operand(HiveProject.class, operand(HiveJoin.class, any())), + operand(RelNode.class, any())), "JoinProjectTransposeRule(Project-Join-Other)", false, HiveRelFactories.HIVE_BUILDER); + } + + @Override + protected boolean hasLeftChild(RelOptRuleCall call) { + return true; + } + + @Override + protected boolean hasRightChild(RelOptRuleCall call) { + return false; + } + } public static final HiveJoinProjectTransposeRule RIGHT_PROJECT_BTW_JOIN = - new HiveJoinProjectTransposeRule( + new HiveJoinRightProjectBtwJoinTransposeRule(); + + private static final class HiveJoinRightProjectBtwJoinTransposeRule extends HiveJoinProjectTransposeRule { + + public HiveJoinRightProjectBtwJoinTransposeRule() { + super( operand(HiveJoin.class, - operand(RelNode.class, any()), - operand(HiveProject.class, operand(HiveJoin.class, any()))), + operand(RelNode.class, any()), + operand(HiveProject.class, operand(HiveJoin.class, any()))), "JoinProjectTransposeRule(Other-Project-Join)", false, HiveRelFactories.HIVE_BUILDER); + } + + @Override + protected boolean hasLeftChild(RelOptRuleCall call) { + return false; + } + + @Override + protected boolean hasRightChild(RelOptRuleCall call) { + return true; + } + } public static final HiveJoinProjectTransposeRule BOTH_PROJECT = new HiveJoinProjectTransposeRule( diff --git ql/src/test/queries/clientpositive/join_reorder5.q ql/src/test/queries/clientpositive/join_reorder5.q new file mode 100644 index 0000000000..08269ea090 --- /dev/null +++ ql/src/test/queries/clientpositive/join_reorder5.q @@ -0,0 +1,29 @@ +-- Project shouldn't contain unnecessary fields in CBO plan. +-- Instead of +-- HiveProject(wr_order_number=[$0], wr_returned_time_sk=[$1], wr_return_quantity=[$2], BLOCK__OFFSET__INSIDE__FILE=[$3], INPUT__FILE__NAME=[$4], ROW__ID=[$5]) +-- Project should look like +-- HiveProject(wr_order_number=[$0]) + +create table web_sales ( + ws_order_number int +); + +create table web_returns ( + wr_order_number int, + wr_returned_time_sk timestamp, + wr_return_quantity int +); + +explain cbo +with ws_wh as + (select ws1.ws_order_number + from web_sales ws1,web_returns wr2 + where ws1.ws_order_number = wr2.wr_order_number) +select + ws_order_number +from + web_sales ws1 +where +ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number); diff --git ql/src/test/results/clientpositive/auto_join22.q.out ql/src/test/results/clientpositive/auto_join22.q.out index 5a98716fed..a8932695ad 100644 --- ql/src/test/results/clientpositive/auto_join22.q.out +++ ql/src/test/results/clientpositive/auto_join22.q.out @@ -17,14 +17,14 @@ STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:$hdt$_0:src1 + $hdt$_0:$hdt$_0:src1 Fetch Operator limit: -1 - $hdt$_0:$hdt$_1:src4 + $hdt$_0:$hdt$_2:src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:$hdt$_0:src1 + $hdt$_0:$hdt$_0:src1 TableScan alias: src1 filterExpr: key is not null (type: boolean) @@ -40,9 +40,9 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - $hdt$_0:$hdt$_1:src4 + $hdt$_0:$hdt$_2:src2 TableScan - alias: src4 + alias: src2 filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -51,7 +51,7 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 177500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -61,7 +61,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src2 + alias: src4 filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator diff --git ql/src/test/results/clientpositive/correlationoptimizer5.q.out ql/src/test/results/clientpositive/correlationoptimizer5.q.out index 2e9e6027ae..1505d7ce96 100644 --- ql/src/test/results/clientpositive/correlationoptimizer5.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer5.q.out @@ -126,24 +126,25 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x + alias: n filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan - alias: y + alias: m filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -166,8 +167,8 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -180,35 +181,35 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: string) + expressions: _col3 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -233,25 +234,24 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: n + alias: x filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan - alias: m + alias: y filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -274,18 +274,14 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_co1 SELECT b.key, d.val @@ -353,24 +349,25 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x + alias: n filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan - alias: y + alias: m filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -387,25 +384,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan - alias: n + alias: x filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan - alias: m + alias: y filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -430,7 +426,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 + outputColumnNames: _col1, _col2 Statistics: Num rows: 4 Data size: 220 Basic stats: COMPLETE Column stats: NONE Mux Operator Statistics: Num rows: 8 Data size: 440 Basic stats: COMPLETE Column stats: NONE @@ -438,12 +434,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3 + outputColumnNames: _col1, _col3 Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: string) + expressions: _col3 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -460,34 +456,30 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 + outputColumnNames: _col0 Statistics: Num rows: 4 Data size: 220 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 220 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 8 Data size: 440 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 + Mux Operator + Statistics: Num rows: 8 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 484 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co2 Stage: Stage-0 Move Operator @@ -577,13 +569,13 @@ STAGE PLANS: Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:y + $hdt$_1:m Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:y + $hdt$_1:m TableScan - alias: y + alias: m filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -602,24 +594,24 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: x + alias: n filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -644,7 +636,7 @@ STAGE PLANS: TableScan HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Stage: Stage-7 @@ -655,17 +647,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: string) + expressions: _col3 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -700,7 +692,7 @@ STAGE PLANS: TableScan HashTable Sink Operator keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Stage: Stage-8 @@ -711,17 +703,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: string) + expressions: _col3 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -736,35 +728,35 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: string) + expressions: _col3 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -774,13 +766,13 @@ STAGE PLANS: Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_2:$hdt$_3:m + $hdt$_2:$hdt$_3:y Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_2:$hdt$_3:m + $hdt$_2:$hdt$_3:y TableScan - alias: m + alias: y filterExpr: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -799,34 +791,30 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: n + alias: x filterExpr: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), val (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/filter_cond_pushdown.q.out ql/src/test/results/clientpositive/filter_cond_pushdown.q.out index 74a7aa89e7..7bce5e9667 100644 --- ql/src/test/results/clientpositive/filter_cond_pushdown.q.out +++ ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -297,12 +297,12 @@ POSTHOOK: Input: default@cbo_t2@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -353,20 +353,23 @@ STAGE PLANS: Filter Operator predicate: (((_col1 + _col4) > 2.0) or _col2) (type: boolean) Statistics: Num rows: 5 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 485 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: t1 filterExpr: (UDFToDouble(key) = 1.0D) (type: boolean) @@ -377,20 +380,13 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 5370 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 5370 Basic stats: COMPLETE Column stats: COMPLETE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -398,15 +394,19 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 + outputColumnNames: _col5 Statistics: Num rows: 12 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Select Operator + expressions: _col5 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 12 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/join22.q.out ql/src/test/results/clientpositive/join22.q.out index ad34bc4310..109779db50 100644 --- ql/src/test/results/clientpositive/join22.q.out +++ ql/src/test/results/clientpositive/join22.q.out @@ -36,7 +36,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) TableScan - alias: src2 + alias: src4 filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -80,7 +80,7 @@ STAGE PLANS: Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) TableScan - alias: src4 + alias: src2 filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -89,13 +89,13 @@ STAGE PLANS: Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 177500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 177500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: diff --git ql/src/test/results/clientpositive/llap/join_reorder5.q.out ql/src/test/results/clientpositive/llap/join_reorder5.q.out new file mode 100644 index 0000000000..5cc02876f2 --- /dev/null +++ ql/src/test/results/clientpositive/llap/join_reorder5.q.out @@ -0,0 +1,81 @@ +PREHOOK: query: create table web_sales ( + ws_order_number int +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@web_sales +POSTHOOK: query: create table web_sales ( + ws_order_number int +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@web_sales +PREHOOK: query: create table web_returns ( + wr_order_number int, + wr_returned_time_sk timestamp, + wr_return_quantity int +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@web_returns +POSTHOOK: query: create table web_returns ( + wr_order_number int, + wr_returned_time_sk timestamp, + wr_return_quantity int +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@web_returns +PREHOOK: query: explain cbo +with ws_wh as + (select ws1.ws_order_number + from web_sales ws1,web_returns wr2 + where ws1.ws_order_number = wr2.wr_order_number) +select + ws_order_number +from + web_sales ws1 +where +ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +PREHOOK: type: QUERY +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +with ws_wh as + (select ws1.ws_order_number + from web_sales ws1,web_returns wr2 + where ws1.ws_order_number = wr2.wr_order_number) +select + ws_order_number +from + web_sales ws1 +where +ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@web_returns +POSTHOOK: Input: default@web_sales +#### A masked pattern was here #### +CBO PLAN: +HiveProject(ws_order_number=[$0]) + HiveSemiJoin(condition=[=($0, $1)], joinType=[semi]) + HiveProject(ws_order_number=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(wr_order_number=[$0]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_order_number=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveProject(ws_order_number=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(wr_order_number=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[wr2]) + diff --git ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out index 9547e4fa7c..406f3d3f82 100644 --- ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out +++ ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out @@ -35,32 +35,32 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_n94 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-18 is a root stage - Stage-13 depends on stages: Stage-18 - Stage-12 depends on stages: Stage-13 , consists of Stage-16, Stage-17, Stage-1 - Stage-16 has a backup stage: Stage-1 - Stage-10 depends on stages: Stage-16 - Stage-9 depends on stages: Stage-1, Stage-10, Stage-11 , consists of Stage-14, Stage-15, Stage-2 - Stage-14 has a backup stage: Stage-2 - Stage-7 depends on stages: Stage-14 - Stage-3 depends on stages: Stage-2, Stage-7, Stage-8 - Stage-15 has a backup stage: Stage-2 + Stage-19 is a root stage + Stage-14 depends on stages: Stage-19 + Stage-13 depends on stages: Stage-14 , consists of Stage-17, Stage-18, Stage-2 + Stage-17 has a backup stage: Stage-2 + Stage-11 depends on stages: Stage-17 + Stage-10 depends on stages: Stage-2, Stage-11, Stage-12 , consists of Stage-15, Stage-16, Stage-3 + Stage-15 has a backup stage: Stage-3 Stage-8 depends on stages: Stage-15 + Stage-4 depends on stages: Stage-3, Stage-8, Stage-9 + Stage-16 has a backup stage: Stage-3 + Stage-9 depends on stages: Stage-16 + Stage-3 + Stage-18 has a backup stage: Stage-2 + Stage-12 depends on stages: Stage-18 Stage-2 - Stage-17 has a backup stage: Stage-1 - Stage-11 depends on stages: Stage-17 - Stage-1 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-18 + Stage: Stage-19 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_2:$hdt$_3:t1_n94 + $hdt$_3:t1_n94 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_2:$hdt$_3:t1_n94 + $hdt$_3:t1_n94 TableScan alias: t1_n94 filterExpr: key is not null (type: boolean) @@ -77,7 +77,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -109,26 +109,17 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-12 + Stage: Stage-13 Conditional Operator - Stage: Stage-16 + Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + $hdt$_1:src1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-10 - Map Reduce - Map Operator Tree: + $hdt$_1:src1 TableScan alias: src1 filterExpr: key is not null (type: boolean) @@ -140,28 +131,37 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 + HashTable Sink Operator keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2 + Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-9 + Stage: Stage-10 Conditional Operator - Stage: Stage-14 + Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src2 @@ -182,10 +182,10 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) - Stage: Stage-7 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -193,7 +193,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) Statistics: Num rows: 665 Data size: 57898 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -212,7 +212,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -236,7 +236,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-15 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -247,10 +247,10 @@ STAGE PLANS: TableScan HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) - Stage: Stage-8 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -268,7 +268,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) Statistics: Num rows: 665 Data size: 57898 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -287,15 +287,15 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col2 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE TableScan alias: src2 @@ -319,7 +319,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) + 0 _col2 (type: string) 1 _col0 (type: string) Statistics: Num rows: 665 Data size: 57898 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -335,14 +335,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-17 + Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:src1 + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:src1 + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-12 + Map Reduce + Map Operator Tree: TableScan alias: src1 filterExpr: key is not null (type: boolean) @@ -354,36 +363,34 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - HashTable Sink Operator + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - - Stage: Stage-11 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col2 + Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE TableScan alias: src1 filterExpr: key is not null (type: boolean) @@ -401,13 +408,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -415,7 +415,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 + outputColumnNames: _col2 Statistics: Num rows: 605 Data size: 52635 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/smb_mapjoin_25.q.out ql/src/test/results/clientpositive/smb_mapjoin_25.q.out index 8fb82e1659..790ed2ef41 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_25.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_25.q.out @@ -46,9 +46,9 @@ POSTHOOK: query: load data local inpath '../../data/files/smb_rc3/000000_0' over POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket_3_n4 -Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product -Warning: Shuffle Join JOIN[14][tables = [$hdt$_2, $hdt$_3]] in Stage 'Stage-4:MAPRED' is a cross product +Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Stage-3:MAPRED' is a cross product +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY @@ -65,9 +65,9 @@ POSTHOOK: Input: default@smb_bucket_3_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -87,7 +87,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TableScan - alias: b + alias: d filterExpr: (key = 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -122,34 +122,6 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: TableScan alias: c filterExpr: (key = 5) (type: boolean) @@ -163,8 +135,31 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: TableScan - alias: d + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: b filterExpr: (key = 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -183,13 +178,18 @@ STAGE PLANS: keys: 0 1 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int), 5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -197,11 +197,9 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[47][bigTable=?] in task 'Stage-7:MAPRED' is a cross product -Warning: Map Join MAPJOIN[39][bigTable=?] in task 'Stage-6:MAPRED' is a cross product -Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Stage-9:MAPRED' is a cross product -Warning: Map Join MAPJOIN[49][bigTable=?] in task 'Stage-10:MAPRED' is a cross product +Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-7:MAPRED' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-7:MAPRED' is a cross product +Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Stage-7:MAPRED' is a cross product PREHOOK: query: explain select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY @@ -217,29 +215,27 @@ POSTHOOK: Input: default@smb_bucket_2_n4 POSTHOOK: Input: default@smb_bucket_3_n4 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-13 is a root stage - Stage-9 depends on stages: Stage-13 - Stage-8 depends on stages: Stage-9, Stage-10 , consists of Stage-11, Stage-12, Stage-2 - Stage-11 has a backup stage: Stage-2 - Stage-6 depends on stages: Stage-11 - Stage-12 has a backup stage: Stage-2 - Stage-7 depends on stages: Stage-12 - Stage-2 - Stage-14 is a root stage - Stage-10 depends on stages: Stage-14 - Stage-0 depends on stages: Stage-6, Stage-7, Stage-2 + Stage-10 is a root stage + Stage-7 depends on stages: Stage-10 + Stage-0 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-13 + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:b + $hdt$_0:a + Fetch Operator + limit: -1 + $hdt$_2:c + Fetch Operator + limit: -1 + $hdt$_3:b Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:b + $hdt$_0:a TableScan - alias: b + alias: a filterExpr: (key = 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -251,12 +247,9 @@ STAGE PLANS: keys: 0 1 - - Stage: Stage-9 - Map Reduce - Map Operator Tree: + $hdt$_2:c TableScan - alias: a + alias: c filterExpr: (key = 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -264,147 +257,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 + HashTable Sink Operator keys: 0 1 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-11 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-12 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Map Operator Tree: + $hdt$_3:b TableScan - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-14 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_2:$hdt$_2:c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_2:$hdt$_2:c - TableScan - alias: c + alias: b filterExpr: (key = 5) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -417,7 +276,7 @@ STAGE PLANS: 0 1 - Stage: Stage-10 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -436,12 +295,31 @@ STAGE PLANS: 0 1 Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int), 5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -452,11 +330,9 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[47][bigTable=?] in task 'Stage-7:MAPRED' is a cross product -Warning: Map Join MAPJOIN[39][bigTable=?] in task 'Stage-6:MAPRED' is a cross product -Warning: Shuffle Join JOIN[21][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product -Warning: Map Join MAPJOIN[48][bigTable=?] in task 'Stage-9:MAPRED' is a cross product -Warning: Map Join MAPJOIN[49][bigTable=?] in task 'Stage-10:MAPRED' is a cross product +Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-7:MAPRED' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-7:MAPRED' is a cross product +Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Stage-7:MAPRED' is a cross product PREHOOK: query: select * from (select a.key from smb_bucket_1_n4 a join smb_bucket_2_n4 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2_n4 c join smb_bucket_3_n4 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY PREHOOK: Input: default@smb_bucket_1_n4