diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java index d508d02ed1..6c6908a9e8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java @@ -46,9 +46,6 @@ public PhysicalOptimizer(PhysicalContext pctx, HiveConf hiveConf) { */ private void initialize(HiveConf hiveConf) { resolvers = new ArrayList(); - if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) { - resolvers.add(new SkewJoinResolver()); - } if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) { resolvers.add(new CommonJoinResolver()); @@ -59,6 +56,9 @@ private void initialize(HiveConf hiveConf) { resolvers.add(new SortMergeJoinResolver()); } } + if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) { + resolvers.add(new SkewJoinResolver()); + } resolvers.add(new MapJoinResolver()); if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVEMETADATAONLYQUERIES)) { diff --git ql/src/test/queries/clientpositive/runtime_skewjoin_mapjoin_hive.q ql/src/test/queries/clientpositive/runtime_skewjoin_mapjoin_hive.q new file mode 100644 index 0000000000..ca70ddf7f7 --- /dev/null +++ ql/src/test/queries/clientpositive/runtime_skewjoin_mapjoin_hive.q @@ -0,0 +1,132 @@ +CREATE TABLE `tbl1`( + `fence` string); + +CREATE TABLE `tbl2`( + `order_id` string, + `phone` string, + `search_id` string +) +PARTITIONED BY ( + `dt` string); + + +CREATE TABLE `tbl3`( + `order_id` string, + `platform` string) +PARTITIONED BY ( + `dt` string); + + +CREATE TABLE `tbl4`( + `groupname` string, + `phone` string) +PARTITIONED BY ( + `dt` string); + + +CREATE TABLE `tbl5`( + `search_id` string, + `fence` string) +PARTITIONED BY ( + `dt` string); + +SET hive.auto.convert.join = TRUE; + +SET hive.optimize.skewjoin = TRUE; + + +EXPLAIN SELECT dt, + platform, + groupname, + count(1) as cnt + FROM + (SELECT dt, + platform, + groupname + FROM + (SELECT fence + FROM tbl1)ta + JOIN + (SELECT a0.dt, + a1.platform, + a2.groupname, + a3.fence + FROM + (SELECT dt, + order_id, + phone, + search_id + FROM tbl2 + WHERE dt =20180703 )a0 + JOIN + (SELECT order_id, + platform, + dt + FROM tbl3 + WHERE dt =20180703 )a1 ON a0.order_id = a1.order_id + INNER JOIN + (SELECT groupname, + phone, + dt + FROM tbl4 + WHERE dt =20180703 )a2 ON a0.phone = a2.phone + LEFT JOIN + (SELECT search_id, + fence, + dt + FROM tbl5 + WHERE dt =20180703)a3 ON a0.search_id = a3.search_id)t0 ON ta.fence = t0.fence)t11 + GROUP BY dt, + platform, + groupname; + +SELECT dt, + platform, + groupname, + count(1) as cnt +FROM +(SELECT dt, + platform, + groupname + FROM + (SELECT fence + FROM tbl1)ta + JOIN + (SELECT a0.dt, + a1.platform, + a2.groupname, + a3.fence + FROM + (SELECT dt, + order_id, + phone, + search_id + FROM tbl2 + WHERE dt =20180703 )a0 + JOIN + (SELECT order_id, + platform, + dt + FROM tbl3 + WHERE dt =20180703 )a1 ON a0.order_id = a1.order_id + INNER JOIN + (SELECT groupname, + phone, + dt + FROM tbl4 + WHERE dt =20180703 )a2 ON a0.phone = a2.phone + LEFT JOIN + (SELECT search_id, + fence, + dt + FROM tbl5 + WHERE dt =20180703)a3 ON a0.search_id = a3.search_id)t0 ON ta.fence = t0.fence)t11 +GROUP BY dt, +platform, +groupname; + +DROP TABLE tbl1; +DROP TABLE tbl2; +DROP TABLE tbl3; +DROP TABLE tbl4; +DROP TABLE tbl5; diff --git ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_hive.q.out ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_hive.q.out new file mode 100644 index 0000000000..af43fbb0f9 --- /dev/null +++ ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_hive.q.out @@ -0,0 +1,862 @@ +PREHOOK: query: CREATE TABLE `tbl1`( + `fence` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: CREATE TABLE `tbl1`( + `fence` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: CREATE TABLE `tbl2`( + `order_id` string, + `phone` string, + `search_id` string +) +PARTITIONED BY ( + `dt` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: CREATE TABLE `tbl2`( + `order_id` string, + `phone` string, + `search_id` string +) +PARTITIONED BY ( + `dt` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: CREATE TABLE `tbl3`( + `order_id` string, + `platform` string) +PARTITIONED BY ( + `dt` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl3 +POSTHOOK: query: CREATE TABLE `tbl3`( + `order_id` string, + `platform` string) +PARTITIONED BY ( + `dt` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl3 +PREHOOK: query: CREATE TABLE `tbl4`( + `groupname` string, + `phone` string) +PARTITIONED BY ( + `dt` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl4 +POSTHOOK: query: CREATE TABLE `tbl4`( + `groupname` string, + `phone` string) +PARTITIONED BY ( + `dt` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl4 +PREHOOK: query: CREATE TABLE `tbl5`( + `search_id` string, + `fence` string) +PARTITIONED BY ( + `dt` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl5 +POSTHOOK: query: CREATE TABLE `tbl5`( + `search_id` string, + `fence` string) +PARTITIONED BY ( + `dt` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl5 +PREHOOK: query: EXPLAIN SELECT dt, + platform, + groupname, + count(1) as cnt + FROM + (SELECT dt, + platform, + groupname + FROM + (SELECT fence + FROM tbl1)ta + JOIN + (SELECT a0.dt, + a1.platform, + a2.groupname, + a3.fence + FROM + (SELECT dt, + order_id, + phone, + search_id + FROM tbl2 + WHERE dt =20180703 )a0 + JOIN + (SELECT order_id, + platform, + dt + FROM tbl3 + WHERE dt =20180703 )a1 ON a0.order_id = a1.order_id + INNER JOIN + (SELECT groupname, + phone, + dt + FROM tbl4 + WHERE dt =20180703 )a2 ON a0.phone = a2.phone + LEFT JOIN + (SELECT search_id, + fence, + dt + FROM tbl5 + WHERE dt =20180703)a3 ON a0.search_id = a3.search_id)t0 ON ta.fence = t0.fence)t11 + GROUP BY dt, + platform, + groupname +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT dt, + platform, + groupname, + count(1) as cnt + FROM + (SELECT dt, + platform, + groupname + FROM + (SELECT fence + FROM tbl1)ta + JOIN + (SELECT a0.dt, + a1.platform, + a2.groupname, + a3.fence + FROM + (SELECT dt, + order_id, + phone, + search_id + FROM tbl2 + WHERE dt =20180703 )a0 + JOIN + (SELECT order_id, + platform, + dt + FROM tbl3 + WHERE dt =20180703 )a1 ON a0.order_id = a1.order_id + INNER JOIN + (SELECT groupname, + phone, + dt + FROM tbl4 + WHERE dt =20180703 )a2 ON a0.phone = a2.phone + LEFT JOIN + (SELECT search_id, + fence, + dt + FROM tbl5 + WHERE dt =20180703)a3 ON a0.search_id = a3.search_id)t0 ON ta.fence = t0.fence)t11 + GROUP BY dt, + platform, + groupname +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-19 is a root stage , consists of Stage-25, Stage-26, Stage-1 + Stage-25 has a backup stage: Stage-1 + Stage-17 depends on stages: Stage-25 + Stage-16 depends on stages: Stage-1, Stage-17, Stage-18 , consists of Stage-23, Stage-24, Stage-2 + Stage-23 has a backup stage: Stage-2 + Stage-14 depends on stages: Stage-23 + Stage-13 depends on stages: Stage-2, Stage-14, Stage-15 , consists of Stage-21, Stage-22, Stage-3 + Stage-21 has a backup stage: Stage-3 + Stage-11 depends on stages: Stage-21 + Stage-20 depends on stages: Stage-3, Stage-11, Stage-12 + Stage-5 depends on stages: Stage-20 + Stage-22 has a backup stage: Stage-3 + Stage-12 depends on stages: Stage-22 + Stage-3 + Stage-24 has a backup stage: Stage-2 + Stage-15 depends on stages: Stage-24 + Stage-2 + Stage-26 has a backup stage: Stage-1 + Stage-18 depends on stages: Stage-26 + Stage-1 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-19 + Conditional Operator + + Stage: Stage-25 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_1:tbl3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_1:tbl3 + TableScan + alias: tbl3 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: order_id (type: string), platform (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl2 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null and phone is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null and phone is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: dt (type: string), order_id (type: string), phone (type: string), search_id (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-16 + Conditional Operator + + Stage: Stage-23 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_2:tbl4 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_2:tbl4 + TableScan + alias: tbl4 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and phone is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and phone is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: groupname (type: string), phone (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + HashTable Sink Operator + keys: + 0 _col2 (type: string) + 1 _col1 (type: string) + + Stage: Stage-14 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3, _col5, _col6 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-13 + Conditional Operator + + Stage: Stage-21 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_3:tbl5 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_3:tbl5 + TableScan + alias: tbl5 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and fence is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and fence is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: search_id (type: string), fence (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + HashTable Sink Operator + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col5, _col6, _col9 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string), _col9 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-20 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:tbl1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:tbl1 + TableScan + alias: tbl1 + filterExpr: fence is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: fence is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: fence (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + HashTable Sink Operator + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-22 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl5 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and fence is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and fence is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: search_id (type: string), fence (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col5, _col6, _col9 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string), _col9 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + TableScan + alias: tbl5 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and fence is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and fence is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: search_id (type: string), fence (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col5, _col6, _col9 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string), _col9 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-24 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col2 (type: string) + 1 _col1 (type: string) + + Stage: Stage-15 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl4 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and phone is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and phone is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: groupname (type: string), phone (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3, _col5, _col6 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col3 (type: string), _col5 (type: string) + TableScan + alias: tbl4 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and phone is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and phone is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: groupname (type: string), phone (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3, _col5, _col6 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-26 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_0:tbl2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_0:tbl2 + TableScan + alias: tbl2 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null and phone is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null and phone is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: dt (type: string), order_id (type: string), phone (type: string), search_id (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + + Stage: Stage-18 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl3 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: order_id (type: string), platform (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl2 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null and phone is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null and phone is not null and search_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: dt (type: string), order_id (type: string), phone (type: string), search_id (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) + TableScan + alias: tbl3 + filterExpr: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((UDFToDouble(dt) = 2.0180703E7D) and order_id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: order_id (type: string), platform (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col5 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT dt, + platform, + groupname, + count(1) as cnt +FROM +(SELECT dt, + platform, + groupname + FROM + (SELECT fence + FROM tbl1)ta + JOIN + (SELECT a0.dt, + a1.platform, + a2.groupname, + a3.fence + FROM + (SELECT dt, + order_id, + phone, + search_id + FROM tbl2 + WHERE dt =20180703 )a0 + JOIN + (SELECT order_id, + platform, + dt + FROM tbl3 + WHERE dt =20180703 )a1 ON a0.order_id = a1.order_id + INNER JOIN + (SELECT groupname, + phone, + dt + FROM tbl4 + WHERE dt =20180703 )a2 ON a0.phone = a2.phone + LEFT JOIN + (SELECT search_id, + fence, + dt + FROM tbl5 + WHERE dt =20180703)a3 ON a0.search_id = a3.search_id)t0 ON ta.fence = t0.fence)t11 +GROUP BY dt, +platform, +groupname +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Input: default@tbl3 +PREHOOK: Input: default@tbl4 +PREHOOK: Input: default@tbl5 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dt, + platform, + groupname, + count(1) as cnt +FROM +(SELECT dt, + platform, + groupname + FROM + (SELECT fence + FROM tbl1)ta + JOIN + (SELECT a0.dt, + a1.platform, + a2.groupname, + a3.fence + FROM + (SELECT dt, + order_id, + phone, + search_id + FROM tbl2 + WHERE dt =20180703 )a0 + JOIN + (SELECT order_id, + platform, + dt + FROM tbl3 + WHERE dt =20180703 )a1 ON a0.order_id = a1.order_id + INNER JOIN + (SELECT groupname, + phone, + dt + FROM tbl4 + WHERE dt =20180703 )a2 ON a0.phone = a2.phone + LEFT JOIN + (SELECT search_id, + fence, + dt + FROM tbl5 + WHERE dt =20180703)a3 ON a0.search_id = a3.search_id)t0 ON ta.fence = t0.fence)t11 +GROUP BY dt, +platform, +groupname +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Input: default@tbl3 +POSTHOOK: Input: default@tbl4 +POSTHOOK: Input: default@tbl5 +#### A masked pattern was here #### +PREHOOK: query: DROP TABLE tbl1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl1 +PREHOOK: Output: default@tbl1 +POSTHOOK: query: DROP TABLE tbl1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl1 +POSTHOOK: Output: default@tbl1 +PREHOOK: query: DROP TABLE tbl2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl2 +PREHOOK: Output: default@tbl2 +POSTHOOK: query: DROP TABLE tbl2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl2 +POSTHOOK: Output: default@tbl2 +PREHOOK: query: DROP TABLE tbl3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl3 +PREHOOK: Output: default@tbl3 +POSTHOOK: query: DROP TABLE tbl3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl3 +POSTHOOK: Output: default@tbl3 +PREHOOK: query: DROP TABLE tbl4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl4 +PREHOOK: Output: default@tbl4 +POSTHOOK: query: DROP TABLE tbl4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl4 +POSTHOOK: Output: default@tbl4 +PREHOOK: query: DROP TABLE tbl5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tbl5 +PREHOOK: Output: default@tbl5 +POSTHOOK: query: DROP TABLE tbl5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tbl5 +POSTHOOK: Output: default@tbl5 diff --git ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out index c1f3401f42..bf9422ab81 100644 --- ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out +++ ql/src/test/results/clientpositive/runtime_skewjoin_mapjoin_spark.q.out @@ -29,102 +29,99 @@ JOIN ON a.key=b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-18 is a root stage , consists of Stage-27, Stage-28, Stage-2 - Stage-27 has a backup stage: Stage-2 - Stage-16 depends on stages: Stage-27 - Stage-8 depends on stages: Stage-2, Stage-13, Stage-14, Stage-16, Stage-17, Stage-22, Stage-23 , consists of Stage-26, Stage-3 - Stage-26 - Stage-7 depends on stages: Stage-26 - Stage-3 depends on stages: Stage-7 - Stage-28 has a backup stage: Stage-2 - Stage-17 depends on stages: Stage-28 + Stage-12 is a root stage , consists of Stage-16, Stage-17, Stage-1 + Stage-16 has a backup stage: Stage-1 + Stage-10 depends on stages: Stage-16 + Stage-9 depends on stages: Stage-1, Stage-10, Stage-11, Stage-13 , consists of Stage-14, Stage-15, Stage-2 + Stage-14 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-14 + Stage-3 depends on stages: Stage-2, Stage-7, Stage-8 + Stage-15 has a backup stage: Stage-2 + Stage-8 depends on stages: Stage-15 Stage-2 - Stage-21 is a root stage , consists of Stage-32, Stage-33, Stage-1 - Stage-32 has a backup stage: Stage-1 - Stage-19 depends on stages: Stage-32 - Stage-10 depends on stages: Stage-1, Stage-19, Stage-20 , consists of Stage-31, Stage-2 - Stage-31 - Stage-9 depends on stages: Stage-31 - Stage-15 depends on stages: Stage-9, Stage-11 , consists of Stage-29, Stage-30, Stage-2 - Stage-29 has a backup stage: Stage-2 - Stage-13 depends on stages: Stage-29 - Stage-30 has a backup stage: Stage-2 - Stage-14 depends on stages: Stage-30 - Stage-33 has a backup stage: Stage-1 - Stage-20 depends on stages: Stage-33 + Stage-17 has a backup stage: Stage-1 + Stage-11 depends on stages: Stage-17 Stage-1 - Stage-24 is a root stage , consists of Stage-34, Stage-35, Stage-2 - Stage-34 has a backup stage: Stage-2 - Stage-22 depends on stages: Stage-34 - Stage-35 has a backup stage: Stage-2 - Stage-23 depends on stages: Stage-35 - Stage-37 is a root stage - Stage-25 depends on stages: Stage-37 - Stage-12 depends on stages: Stage-25 , consists of Stage-36, Stage-2 - Stage-36 - Stage-11 depends on stages: Stage-36 + Stage-18 is a root stage + Stage-13 depends on stages: Stage-18 Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-18 + Stage: Stage-12 Conditional Operator - Stage: Stage-27 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME1 + $hdt$_0:$hdt$_1:src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME1 + $hdt$_0:$hdt$_1:src2 TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-16 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Execution mode: vectorized Local Work: Map Reduce Local Work - Stage: Stage-8 + Stage: Stage-9 Conditional Operator - Stage: Stage-26 + Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - 1 + $INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - 1 + $INTNAME1 TableScan HashTable Sink Operator keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) + 0 _col0 (type: string) + 1 _col0 (type: string) Stage: Stage-7 Map Reduce @@ -134,8 +131,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -174,7 +172,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-28 + Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -188,7 +186,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-17 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -233,7 +231,6 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - handleSkewJoin: true keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -250,186 +247,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-21 - Conditional Operator - - Stage: Stage-32 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:src2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:src2 - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-19 - Map Reduce - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-10 - Conditional Operator - - Stage: Stage-31 - Map Reduce Local Work - Alias -> Map Local Tables: - 1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - 1 - TableScan - HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - - Stage: Stage-9 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-15 - Conditional Operator - - Stage: Stage-29 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-30 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-14 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-33 + Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_0:src1 @@ -453,7 +271,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-20 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -524,7 +342,6 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 - handleSkewJoin: true keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -537,90 +354,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-24 - Conditional Operator - - Stage: Stage-34 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-22 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-35 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-23 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-37 + Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:$hdt$_2:t1_n94 @@ -644,7 +378,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-25 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan @@ -676,43 +410,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-12 - Conditional Operator - - Stage: Stage-36 - Map Reduce Local Work - Alias -> Map Local Tables: - 1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - 1 - TableScan - HashTable Sink Operator - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - - Stage: Stage-11 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 reducesinkkey0 (type: string) - 1 reducesinkkey0 (type: string) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - Stage: Stage-0 Fetch Operator limit: -1 @@ -737,3 +434,4 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_n94 #### A masked pattern was here #### +3