diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java index d508d02ed1ef2c88e8069bd4ee6a6d9b618868b2..6c6908a9e866171d1d41532afb397c1df2097ddd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java @@ -46,9 +46,6 @@ public PhysicalOptimizer(PhysicalContext pctx, HiveConf hiveConf) { */ private void initialize(HiveConf hiveConf) { resolvers = new ArrayList(); - if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) { - resolvers.add(new SkewJoinResolver()); - } if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) { resolvers.add(new CommonJoinResolver()); @@ -59,6 +56,9 @@ private void initialize(HiveConf hiveConf) { resolvers.add(new SortMergeJoinResolver()); } } + if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) { + resolvers.add(new SkewJoinResolver()); + } resolvers.add(new MapJoinResolver()); if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) { diff --git a/ql/src/test/queries/clientpositive/runtime_skewjoin_mapjoin_hive.q b/ql/src/test/queries/clientpositive/runtime_skewjoin_mapjoin_hive.q new file mode 100644 index 0000000000000000000000000000000000000000..ca70ddf7f7636918eef90a3e14fcd4b3a88215ec --- /dev/null +++ b/ql/src/test/queries/clientpositive/runtime_skewjoin_mapjoin_hive.q @@ -0,0 +1,132 @@ +CREATE TABLE `tbl1`( + `fence` string); + +CREATE TABLE `tbl2`( + `order_id` string, + `phone` string, + `search_id` string +) +PARTITIONED BY ( + `dt` string); + + +CREATE TABLE `tbl3`( + `order_id` string, + `platform` string) +PARTITIONED BY ( + `dt` string); + + +CREATE TABLE `tbl4`( + `groupname` string, + `phone` string) +PARTITIONED BY ( + `dt` string); + + +CREATE TABLE `tbl5`( + `search_id` string, + `fence` string) +PARTITIONED BY ( + `dt` string); + +SET hive.auto.convert.join = TRUE; + +SET hive.optimize.skewjoin = TRUE; + + +EXPLAIN SELECT dt, + platform, + groupname, + count(1) as cnt + FROM + (SELECT dt, + platform, + groupname + FROM + (SELECT fence + FROM tbl1)ta + JOIN + (SELECT a0.dt, + a1.platform, + a2.groupname, + a3.fence + FROM + (SELECT dt, + order_id, + phone, + search_id + FROM tbl2 + WHERE dt =20180703 )a0 + JOIN + (SELECT order_id, + platform, + dt + FROM tbl3 + WHERE dt =20180703 )a1 ON a0.order_id = a1.order_id + INNER JOIN + (SELECT groupname, + phone, + dt + FROM tbl4 + WHERE dt =20180703 )a2 ON a0.phone = a2.phone + LEFT JOIN + (SELECT search_id, + fence, + dt + FROM tbl5 + WHERE dt =20180703)a3 ON a0.search_id = a3.search_id)t0 ON ta.fence = t0.fence)t11 + GROUP BY dt, + platform, + groupname; + +SELECT dt, + platform, + groupname, + count(1) as cnt +FROM +(SELECT dt, + platform, + groupname + FROM + (SELECT fence + FROM tbl1)ta + JOIN + (SELECT a0.dt, + a1.platform, + a2.groupname, + a3.fence + FROM + (SELECT dt, + order_id, + phone, + search_id + FROM tbl2 + WHERE dt =20180703 )a0 + JOIN + (SELECT order_id, + platform, + dt + FROM tbl3 + WHERE dt =20180703 )a1 ON a0.order_id = a1.order_id + INNER JOIN + (SELECT groupname, + phone, + dt + FROM tbl4 + WHERE dt =20180703 )a2 ON a0.phone = a2.phone + LEFT JOIN + (SELECT search_id, + fence, + dt + FROM tbl5 + WHERE dt =20180703)a3 ON a0.search_id = a3.search_id)t0 ON ta.fence = t0.fence)t11 +GROUP BY dt, +platform, +groupname; + +DROP TABLE tbl1; +DROP TABLE tbl2; +DROP TABLE tbl3; +DROP TABLE tbl4; +DROP TABLE tbl5; diff --git a/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_hive.q.out b/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_hive.q.out new file mode 100644 index 0000000000000000000000000000000000000000..ec4c6d19ae781c24ff9deecad5a54affda387c5f --- /dev/null +++ b/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_hive.q.out @@ -0,0 +1,874 @@ +PREHOOK: query: CREATE TABLE `tbl1`( + `fence` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: CREATE TABLE `tbl1`( + `fence` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: CREATE TABLE `tbl2`( + `order_id` string, + `phone` string, + `search_id` string +) +PARTITIONED BY ( + `dt` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: CREATE TABLE `tbl2`( + `order_id` string, + `phone` string, + `search_id` string +) +PARTITIONED BY ( + `dt` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: CREATE TABLE `tbl3`( + `order_id` string, + `platform` string) +PARTITIONED BY ( + `dt` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl3 +POSTHOOK: query: CREATE TABLE `tbl3`( + `order_id` string, + `platform` string) +PARTITIONED BY ( + `dt` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl3 +PREHOOK: query: CREATE TABLE `tbl4`( + `groupname` string, + `phone` string) +PARTITIONED BY ( + `dt` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl4 +POSTHOOK: query: CREATE TABLE `tbl4`( + `groupname` string, + `phone` string) +PARTITIONED BY ( + `dt` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl4 +PREHOOK: query: CREATE TABLE `tbl5`( + `search_id` string, + `fence` string) +PARTITIONED BY ( + `dt` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl5 +POSTHOOK: query: CREATE TABLE `tbl5`( + `search_id` string, + `fence` string) +PARTITIONED BY ( + `dt` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl5 +PREHOOK: query: EXPLAIN SELECT dt, + platform, + groupname, + count(1) as cnt + FROM + (SELECT dt, + platform, + groupname + FROM + (SELECT fence + FROM tbl1)ta + JOIN + (SELECT a0.dt, + a1.platform, + a2.groupname, + a3.fence + FROM + (SELECT dt, + order_id, + phone, + search_id + FROM tbl2 + WHERE dt =20180703 )a0 + JOIN + (SELECT order_id, + platform, + dt + FROM tbl3 + WHERE dt =20180703 )a1 ON a0.order_id = a1.order_id + INNER JOIN + (SELECT groupname, + phone, + dt + FROM tbl4 + WHERE dt =20180703 )a2 ON a0.phone = a2.phone + LEFT JOIN + (SELECT search_id, + fence, + dt + FROM tbl5 + WHERE dt =20180703)a3 ON a0.search_id = a3.search_id)t0 ON ta.fence = t0.fence)t11 + GROUP BY dt, + platform, + groupname +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Input: default@tbl3 +PREHOOK: Input: default@tbl4 +PREHOOK: Input: default@tbl5 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT dt, + platform, + groupname, + count(1) as cnt + FROM + (SELECT dt, + platform, + groupname + FROM + (SELECT fence + FROM tbl1)ta + JOIN + (SELECT a0.dt, + a1.platform, + a2.groupname, + a3.fence + FROM + (SELECT dt, + order_id, + phone, + search_id + FROM tbl2 + WHERE dt =20180703 )a0 + JOIN + (SELECT order_id, + platform, + dt + FROM tbl3 + WHERE dt =20180703 )a1 ON a0.order_id = a1.order_id + INNER JOIN + (SELECT groupname, + phone, + dt + FROM tbl4 + WHERE dt =20180703 )a2 ON a0.phone = a2.phone + LEFT JOIN + (SELECT search_id, + fence, + dt + FROM tbl5 + WHERE dt =20180703)a3 ON a0.search_id = a3.search_id)t0 ON ta.fence = t0.fence)t11 + GROUP BY dt, + platform, + groupname +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Input: default@tbl3 +POSTHOOK: Input: default@tbl4 +POSTHOOK: Input: default@tbl5 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-18 is a root stage , consists of Stage-24, Stage-25, Stage-3 + Stage-24 has a backup stage: Stage-3 + Stage-16 depends on stages: Stage-24 + Stage-15 depends on stages: Stage-3, Stage-16, Stage-17 , consists of Stage-22, Stage-23, Stage-4 + Stage-22 has a backup stage: Stage-4 + Stage-13 depends on stages: Stage-22 + Stage-12 depends on stages: Stage-4, Stage-13, Stage-14 , consists of Stage-20, Stage-21, Stage-5 + Stage-20 has a backup stage: Stage-5 + Stage-10 depends on stages: Stage-20 + Stage-19 depends on stages: Stage-5, Stage-10, Stage-11 + Stage-2 depends on stages: Stage-19 + Stage-21 has a backup stage: Stage-5 + Stage-11 depends on stages: Stage-21 + Stage-5 + Stage-23 has a backup stage: Stage-4 + Stage-14 depends on stages: Stage-23 + Stage-4 + Stage-25 has a backup stage: Stage-3 + Stage-17 depends on stages: Stage-25 + Stage-3 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-18 + Conditional Operator + + Stage: Stage-24 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:$hdt$_2:tbl3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:$hdt$_2:tbl3 + TableScan + alias: tbl3 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: order_id (type: string), platform (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl2 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null and phone is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null and phone is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: dt (type: string), order_id (type: string), phone (type: string), search_id (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-15 + Conditional Operator + + Stage: Stage-22 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:$hdt$_3:tbl4 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:$hdt$_3:tbl4 + TableScan + alias: tbl4 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and phone is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and phone is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: groupname (type: string), phone (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + HashTable Sink Operator + keys: + 0 _col2 (type: string) + 1 _col1 (type: string) + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3, _col5, _col6 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-12 + Conditional Operator + + Stage: Stage-20 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:$hdt$_4:tbl5 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:$hdt$_4:tbl5 + TableScan + alias: tbl5 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and fence is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and fence is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: search_id (type: string), fence (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + HashTable Sink Operator + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col5, _col6, _col9 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string), _col9 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-19 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:tbl1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:tbl1 + TableScan + alias: tbl1 + filterExpr: fence is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: fence is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: fence (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col3 (type: string) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col3 (type: string) + outputColumnNames: _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + keys: _col4 (type: string), _col5 (type: string), _col6 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-21 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:$INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl5 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and fence is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and fence is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: search_id (type: string), fence (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col5, _col6, _col9 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string), _col9 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + TableScan + alias: tbl5 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and fence is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and fence is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: search_id (type: string), fence (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col5, _col6, _col9 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string), _col9 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-23 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:$INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col2 (type: string) + 1 _col1 (type: string) + + Stage: Stage-14 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl4 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and phone is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and phone is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: groupname (type: string), phone (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3, _col5, _col6 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col3 (type: string), _col5 (type: string) + TableScan + alias: tbl4 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and phone is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and phone is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: groupname (type: string), phone (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3, _col5, _col6 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-25 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:$hdt$_1:tbl2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:$hdt$_1:tbl2 + TableScan + alias: tbl2 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null and phone is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null and phone is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: dt (type: string), order_id (type: string), phone (type: string), search_id (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl3 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: order_id (type: string), platform (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl2 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null and phone is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null and phone is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: dt (type: string), order_id (type: string), phone (type: string), search_id (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) + TableScan + alias: tbl3 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: order_id (type: string), platform (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT dt, + platform, + groupname, + count(1) as cnt +FROM +(SELECT dt, + platform, + groupname + FROM + (SELECT fence + FROM tbl1)ta + JOIN + (SELECT a0.dt, + a1.platform, + a2.groupname, + a3.fence + FROM + (SELECT dt, + order_id, + phone, + search_id + FROM tbl2 + WHERE dt =20180703 )a0 + JOIN + (SELECT order_id, + platform, + dt + FROM tbl3 + WHERE dt =20180703 )a1 ON a0.order_id = a1.order_id + INNER JOIN + (SELECT groupname, + phone, + dt + FROM tbl4 + WHERE dt =20180703 )a2 ON a0.phone = a2.phone + LEFT JOIN + (SELECT search_id, + fence, + dt + FROM tbl5 + WHERE dt =20180703)a3 ON a0.search_id = a3.search_id)t0 ON ta.fence = t0.fence)t11 +GROUP BY dt, +platform, +groupname +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Input: default@tbl3 +PREHOOK: Input: default@tbl4 +PREHOOK: Input: default@tbl5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dt, + platform, + groupname, + count(1) as cnt +FROM +(SELECT dt, + platform, + groupname + FROM + (SELECT fence + FROM tbl1)ta + JOIN + (SELECT a0.dt, + a1.platform, + a2.groupname, + a3.fence + FROM + (SELECT dt, + order_id, + phone, + search_id + FROM tbl2 + WHERE dt =20180703 )a0 + JOIN + (SELECT order_id, + platform, + dt + FROM tbl3 + WHERE dt =20180703 )a1 ON a0.order_id = a1.order_id + INNER JOIN + (SELECT groupname, + phone, + dt + FROM tbl4 + WHERE dt =20180703 )a2 ON a0.phone = a2.phone + LEFT JOIN + (SELECT search_id, + fence, + dt + FROM tbl5 + WHERE dt =20180703)a3 ON a0.search_id = a3.search_id)t0 ON ta.fence = t0.fence)t11 +GROUP BY dt, +platform, +groupname +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Input: default@tbl3 +POSTHOOK: Input: default@tbl4 +POSTHOOK: Input: default@tbl5 +#### A masked pattern was here #### +PREHOOK: query: DROP TABLE tbl1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl1 +PREHOOK: Output: default@tbl1 +POSTHOOK: query: DROP TABLE tbl1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl1 +POSTHOOK: Output: default@tbl1 +PREHOOK: query: DROP TABLE tbl2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl2 +PREHOOK: Output: default@tbl2 +POSTHOOK: query: DROP TABLE tbl2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl2 +POSTHOOK: Output: default@tbl2 +PREHOOK: query: DROP TABLE tbl3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl3 +PREHOOK: Output: default@tbl3 +POSTHOOK: query: DROP TABLE tbl3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl3 +POSTHOOK: Output: default@tbl3 +PREHOOK: query: DROP TABLE tbl4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl4 +PREHOOK: Output: default@tbl4 +POSTHOOK: query: DROP TABLE tbl4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl4 +POSTHOOK: Output: default@tbl4 +PREHOOK: query: DROP TABLE tbl5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl5 +PREHOOK: Output: default@tbl5 +POSTHOOK: query: DROP TABLE tbl5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl5 +POSTHOOK: Output: default@tbl5 diff --git a/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out b/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out index ee8dbc2f3dd6770449d1f306ad6ff0448a2559e4..972f6a801c8d3974700aef08ae790279fed85dc6 100644 --- a/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out +++ b/ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out @@ -539,7 +539,7 @@ STAGE PLANS: Stage: Stage-10 Conditional Operator - Stage: Stage-28 + Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: 1 @@ -819,3 +819,4 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_n94 #### A masked pattern was here #### +3