diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java index d508d02ed1..6c6908a9e8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java @@ -46,9 +46,6 @@ public PhysicalOptimizer(PhysicalContext pctx, HiveConf hiveConf) { */ private void initialize(HiveConf hiveConf) { resolvers = new ArrayList(); - if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) { - resolvers.add(new SkewJoinResolver()); - } if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) { resolvers.add(new CommonJoinResolver()); @@ -59,6 +56,9 @@ private void initialize(HiveConf hiveConf) { resolvers.add(new SortMergeJoinResolver()); } } + if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) { + resolvers.add(new SkewJoinResolver()); + } resolvers.add(new MapJoinResolver()); if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) { diff --git a/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out b/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out index c1f3401f42..bf9422ab81 100644 --- a/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out +++ b/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out @@ -29,102 +29,99 @@ JOIN ON a.key=b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-18 is a root stage , consists of Stage-27, Stage-28, Stage-2 - Stage-27 has a backup stage: Stage-2 - Stage-16 depends on stages: Stage-27 - Stage-8 depends on stages: Stage-2, Stage-13, Stage-14, Stage-16, Stage-17, Stage-22, Stage-23 , consists of Stage-26, Stage-3 - Stage-26 - Stage-7 depends on stages: Stage-26 - Stage-3 depends on stages: Stage-7 - Stage-28 has a backup stage: Stage-2 - Stage-17 depends on stages: Stage-28 + Stage-12 is a root stage , consists of Stage-16, Stage-17, Stage-1 + Stage-16 has a backup stage: Stage-1 + Stage-10 depends on stages: Stage-16 + Stage-9 depends on stages: Stage-1, Stage-10, Stage-11, Stage-13 , consists of Stage-14, Stage-15, Stage-2 + Stage-14 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-14 + Stage-3 depends on stages: Stage-2, Stage-7, Stage-8 + Stage-15 has a backup stage: Stage-2 + Stage-8 depends on stages: Stage-15 Stage-2 - Stage-21 is a root stage , consists of Stage-32, Stage-33, Stage-1 - Stage-32 has a backup stage: Stage-1 - Stage-19 depends on stages: Stage-32 - Stage-10 depends on stages: Stage-1, Stage-19, Stage-20 , consists of Stage-31, Stage-2 - Stage-31 - Stage-9 depends on stages: Stage-31 - Stage-15 depends on stages: Stage-9, Stage-11 , consists of Stage-29, Stage-30, Stage-2 - Stage-29 has a backup stage: Stage-2 - Stage-13 depends on stages: Stage-29 - Stage-30 has a backup stage: Stage-2 - Stage-14 depends on stages: Stage-30 - Stage-33 has a backup stage: Stage-1 - Stage-20 depends on stages: Stage-33 + Stage-17 has a backup stage: Stage-1 + Stage-11 depends on stages: Stage-17 Stage-1 - Stage-24 is a root stage , consists of Stage-34, Stage-35, Stage-2 - Stage-34 has a backup stage: Stage-2 - Stage-22 depends on stages: Stage-34 - Stage-35 has a backup stage: Stage-2 - Stage-23 depends on stages: Stage-35 - Stage-37 is a root stage - Stage-25 depends on stages: Stage-37 - Stage-12 depends on stages: Stage-25 , consists of Stage-36, Stage-2 - Stage-36 - Stage-11 depends on stages: Stage-36 + Stage-18 is a root stage + Stage-13 depends on stages: Stage-18 Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-18 + Stage: Stage-12 Conditional Operator - Stage: Stage-27 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME1 + $hdt$_0:$hdt$_1:src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME1 + $hdt$_0:$hdt$_1:src2 TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-16 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-8 + Stage: Stage-9 Conditional Operator - Stage: Stage-26 + Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - 1 + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - 1 + $INTNAME1 TableScan HashTable Sink Operator keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-7 Map Reduce @@ -134,8 +131,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -174,7 +172,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-28 + Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -188,7 +186,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-17 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -233,7 +231,6 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - handleSkewJoin: true keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -250,186 +247,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-21 - Conditional Operator - - Stage: Stage-32 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:src2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:src2 - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-19 - Map Reduce - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-10 - Conditional Operator - - Stage: Stage-31 - Map Reduce Local Work - Alias -> Map Local Tables: - 1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - 1 - TableScan - HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - - Stage: Stage-9 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-15 - Conditional Operator - - Stage: Stage-29 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-30 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-14 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-33 + Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_0:src1 @@ -453,7 +271,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-20 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -524,7 +342,6 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - handleSkewJoin: true keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -537,90 +354,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-24 - Conditional Operator - - Stage: Stage-34 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-22 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-35 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-23 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-37 + Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:$hdt$_2:t1_n94 @@ -644,7 +378,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-25 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan @@ -676,43 +410,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-12 - Conditional Operator - - Stage: Stage-36 - Map Reduce Local Work - Alias -> Map Local Tables: - 1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - 1 - TableScan - HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - - Stage: Stage-11 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - Stage: Stage-0 Fetch Operator limit: -1 @@ -737,3 +434,4 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_n94 #### A masked pattern was here #### +3