diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 82a8cda735..83dcbafdca 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -9410,6 +9410,25 @@ boolean continueJoinMerge() { return true; } + boolean shouldMerge(final QBJoinTree node, final QBJoinTree target) { + boolean isNodeOuterJoin=false, isNodeSemiJoin=false, hasNodePostJoinFilters=false; + boolean isTargetOuterJoin=false, isTargetSemiJoin=false, hasTargetPostJoinFilters=false; + + isNodeOuterJoin = !node.getNoOuterJoin(); + isNodeSemiJoin= !node.getNoSemiJoin(); + hasNodePostJoinFilters = node.getPostJoinFilters().size() !=0; + + isTargetOuterJoin = !target.getNoOuterJoin(); + isTargetSemiJoin= !target.getNoSemiJoin(); + hasTargetPostJoinFilters = target.getPostJoinFilters().size() !=0; + + if((hasNodePostJoinFilters && (isNodeOuterJoin || isNodeSemiJoin)) + || (hasTargetPostJoinFilters && (isTargetOuterJoin || isTargetSemiJoin))) { + return false; + } + return true; + } + // try merge join tree from inner most source // (it was merged from outer most to inner, which could be invalid) // @@ -9446,8 +9465,7 @@ private void mergeJoinTree(QB qb) { if (prevType != null && prevType != currType) { break; } - if ((!node.getNoOuterJoin() && node.getPostJoinFilters().size() != 0) || - (!target.getNoOuterJoin() && target.getPostJoinFilters().size() != 0)) { + if(!shouldMerge(node, target)) { // Outer joins with post-filtering conditions cannot be merged break; } diff --git a/ql/src/test/queries/clientpositive/subquery_multi.q b/ql/src/test/queries/clientpositive/subquery_multi.q index 86305ad664..780647d082 100644 --- a/ql/src/test/queries/clientpositive/subquery_multi.q +++ b/ql/src/test/queries/clientpositive/subquery_multi.q @@ -126,6 +126,17 @@ select count(*) from src or src.value is not null or exists(select key from src); +-- EXISTS and NOT EXISTS with non-equi predicate +explain select * from part ws1 where + exists (select * from part ws2 where ws1.p_type= ws2.p_type + and ws1.p_retailprice <> ws2.p_retailprice) + and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name); +select * from part ws1 where + exists (select * from part ws2 where ws1.p_type= ws2.p_type + and ws1.p_retailprice <> ws2.p_retailprice) + and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name); + + drop table tnull; drop table tempty; drop table part_null; diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out index 216bc23786..67a22c812f 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out @@ -4161,6 +4161,185 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 500 +PREHOOK: query: explain select * from part ws1 where + exists (select * from part ws2 where ws1.p_type= ws2.p_type + and ws1.p_retailprice <> ws2.p_retailprice) + and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part ws1 where + exists (select * from part ws2 where ws1.p_type= ws2.p_type + and ws1.p_retailprice <> ws2.p_retailprice) + and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ws1 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: ws2 + Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_retailprice is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_retailprice (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: wr1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col4 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + residual filter predicates: {(_col7 <> _col10)} + Statistics: Num rows: 14 Data size: 8778 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 8666 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 14 Data size: 8666 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col4 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 15 Data size: 9532 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col10 is null (type: boolean) + Statistics: Num rows: 7 Data size: 4448 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 7 Data size: 4448 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 4448 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part ws1 where + exists (select * from part ws2 where ws1.p_type= ws2.p_type + and ws1.p_retailprice <> ws2.p_retailprice) + and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null +#### A masked pattern was here #### +POSTHOOK: query: select * from part ws1 where + exists (select * from part ws2 where ws1.p_type= ws2.p_type + and ws1.p_retailprice <> ws2.p_retailprice) + and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null +#### A masked pattern was here #### +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: drop table tnull PREHOOK: type: DROPTABLE PREHOOK: Input: default@tnull diff --git a/ql/src/test/results/clientpositive/perf/spark/query16.q.out b/ql/src/test/results/clientpositive/perf/spark/query16.q.out index fd31b22e7a..1b763e278e 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query16.q.out @@ -69,7 +69,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 10 Map Operator Tree: TableScan alias: call_center @@ -92,7 +92,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 8 Map Operator Tree: TableScan alias: date_dim @@ -114,12 +114,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 12 <- Map 11 (GROUP, 24) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 494), Map 8 (PARTITION-LEVEL SORT, 494) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 723), Reducer 12 (PARTITION-LEVEL SORT, 723), Reducer 2 (PARTITION-LEVEL SORT, 723) - Reducer 4 <- Reducer 3 (GROUP, 447) - Reducer 5 <- Reducer 4 (GROUP, 1) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 13 <- Map 12 (GROUP, 24) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 494), Map 9 (PARTITION-LEVEL SORT, 494) + Reducer 3 <- Map 11 (PARTITION-LEVEL SORT, 711), Reducer 2 (PARTITION-LEVEL SORT, 711) + Reducer 4 <- Reducer 13 (PARTITION-LEVEL SORT, 459), Reducer 3 (PARTITION-LEVEL SORT, 459) + Reducer 5 <- Reducer 4 (GROUP, 246) + Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 7 <- Reducer 6 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -142,7 +143,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 input vertices: - 1 Map 7 + 1 Map 8 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) @@ -152,7 +153,7 @@ STAGE PLANS: value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Local Work: Map Reduce Local Work - Map 10 + Map 11 Map Operator Tree: TableScan alias: cs2 @@ -175,7 +176,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Map 11 + Map 12 Map Operator Tree: TableScan alias: cr1 @@ -193,7 +194,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 9 Map Operator Tree: TableScan alias: customer_address @@ -210,7 +211,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Reducer 12 + Reducer 13 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -247,7 +248,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col3, _col4, _col5, _col6 input vertices: - 1 Map 9 + 1 Map 10 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: int) @@ -260,45 +261,59 @@ STAGE PLANS: Join Operator condition map: Left Semi Join 0 to 1 - Left Outer Join 0 to 2 keys: 0 _col4 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col6, _col14, _col16 + outputColumnNames: _col3, _col4, _col5, _col6, _col14 residual filter predicates: {(_col3 <> _col14)} - Statistics: Num rows: 843291907 Data size: 114198664830 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col16 (type: boolean) - outputColumnNames: _col4, _col5, _col6, _col16 - Statistics: Num rows: 843291907 Data size: 114198664830 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col16 is null (type: boolean) - Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - outputColumnNames: _col4, _col5, _col6 - Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5), sum(_col6) - keys: _col4 (type: int) - mode: hash - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) + expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + outputColumnNames: _col4, _col5, _col6 + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col5, _col6, _col14 + Statistics: Num rows: 463810558 Data size: 62809267017 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col14 is null (type: boolean) + Statistics: Num rows: 231905279 Data size: 31404633508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + outputColumnNames: _col4, _col5, _col6 + Statistics: Num rows: 231905279 Data size: 31404633508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), sum(_col6) + keys: _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 231905279 Data size: 31404633508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 231905279 Data size: 31404633508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) keys: KEY._col0 (type: int) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231905279 Data size: 31404633508 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), sum(_col1), sum(_col2) mode: partial2 @@ -308,7 +323,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 5 + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) @@ -325,7 +340,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 6 + Reducer 7 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2)) diff --git a/ql/src/test/results/clientpositive/perf/spark/query94.q.out b/ql/src/test/results/clientpositive/perf/spark/query94.q.out index 995875aa84..39c4476a5c 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query94.q.out @@ -65,7 +65,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 9 + Map 10 Map Operator Tree: TableScan alias: web_site @@ -88,7 +88,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 8 Map Operator Tree: TableScan alias: date_dim @@ -110,12 +110,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 12 <- Map 11 (GROUP, 11) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 327), Map 8 (PARTITION-LEVEL SORT, 327) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 362), Reducer 12 (PARTITION-LEVEL SORT, 362), Reducer 2 (PARTITION-LEVEL SORT, 362) - Reducer 4 <- Reducer 3 (GROUP, 224) - Reducer 5 <- Reducer 4 (GROUP, 1) - Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 13 <- Map 12 (GROUP, 11) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 327), Map 9 (PARTITION-LEVEL SORT, 327) + Reducer 3 <- Map 11 (PARTITION-LEVEL SORT, 357), Reducer 2 (PARTITION-LEVEL SORT, 357) + Reducer 4 <- Reducer 13 (PARTITION-LEVEL SORT, 230), Reducer 3 (PARTITION-LEVEL SORT, 230) + Reducer 5 <- Reducer 4 (GROUP, 124) + Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 7 <- Reducer 6 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -138,7 +139,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 input vertices: - 1 Map 7 + 1 Map 8 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) @@ -148,7 +149,7 @@ STAGE PLANS: value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Local Work: Map Reduce Local Work - Map 10 + Map 11 Map Operator Tree: TableScan alias: ws2 @@ -171,7 +172,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Map 11 + Map 12 Map Operator Tree: TableScan alias: wr1 @@ -189,7 +190,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 9 Map Operator Tree: TableScan alias: customer_address @@ -206,7 +207,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Reducer 12 + Reducer 13 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -243,7 +244,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col3, _col4, _col5, _col6 input vertices: - 1 Map 9 + 1 Map 10 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: int) @@ -256,45 +257,59 @@ STAGE PLANS: Join Operator condition map: Left Semi Join 0 to 1 - Left Outer Join 0 to 2 keys: 0 _col4 (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col6, _col14, _col16 + outputColumnNames: _col3, _col4, _col5, _col6, _col14 residual filter predicates: {(_col3 <> _col14)} - Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col16 (type: boolean) - outputColumnNames: _col4, _col5, _col6, _col16 - Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col16 is null (type: boolean) - Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) - outputColumnNames: _col4, _col5, _col6 - Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col5), sum(_col6) - keys: _col4 (type: int) - mode: hash - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) + expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + outputColumnNames: _col4, _col5, _col6 + Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col5, _col6, _col14 + Statistics: Num rows: 231917759 Data size: 31534108438 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col14 is null (type: boolean) + Statistics: Num rows: 115958879 Data size: 15767054151 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + outputColumnNames: _col4, _col5, _col6 + Statistics: Num rows: 115958879 Data size: 15767054151 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), sum(_col6) + keys: _col4 (type: int) + mode: hash + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 115958879 Data size: 15767054151 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 115958879 Data size: 15767054151 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) keys: KEY._col0 (type: int) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 115958879 Data size: 15767054151 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), sum(_col1), sum(_col2) mode: partial2 @@ -304,7 +319,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 5 + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) @@ -321,7 +336,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 6 + Reducer 7 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2)) diff --git a/ql/src/test/results/clientpositive/perf/tez/query16.q.out b/ql/src/test/results/clientpositive/perf/tez/query16.q.out index 8107a05b54..1c0539199e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query16.q.out @@ -61,126 +61,132 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 14 <- Map 13 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 14 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 15 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 - File Output Operator [FS_49] - Limit [LIM_47] (rows=1 width=344) + Reducer 9 + File Output Operator [FS_51] + Limit [LIM_49] (rows=1 width=344) Number of rows:100 - Select Operator [SEL_46] (rows=1 width=344) + Select Operator [SEL_48] (rows=1 width=344) Output:["_col0","_col1","_col2"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_45] - Select Operator [SEL_44] (rows=1 width=344) + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_47] + Select Operator [SEL_46] (rows=1 width=344) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_77] (rows=1 width=344) + Group By Operator [GBY_78] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_76] - Group By Operator [GBY_75] (rows=1 width=344) + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_77] + Group By Operator [GBY_76] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_74] (rows=421645953 width=135) + Group By Operator [GBY_75] (rows=231905279 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_73] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_74] PartitionCols:_col0 - Group By Operator [GBY_72] (rows=421645953 width=135) + Group By Operator [GBY_73] (rows=231905279 width=135) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 - Select Operator [SEL_40] (rows=421645953 width=135) + Select Operator [SEL_42] (rows=231905279 width=135) Output:["_col4","_col5","_col6"] - Filter Operator [FIL_39] (rows=421645953 width=135) - predicate:_col16 is null - Select Operator [SEL_38] (rows=843291907 width=135) - Output:["_col4","_col5","_col6","_col16"] - Merge Join Operator [MERGEJOIN_81] (rows=843291907 width=135) - Conds:RS_34._col4=RS_35._col0(Left Semi),RS_34._col4=RS_36._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14","_col16"],residual filter predicates:{(_col3 <> _col14)} - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_35] - PartitionCols:_col0 - Group By Operator [GBY_33] (rows=287989836 width=135) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_24] (rows=287989836 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_70] (rows=287989836 width=135) - predicate:(cs_order_number is not null and cs_warehouse_sk is not null) - TableScan [TS_22] (rows=287989836 width=135) - default@catalog_sales,cs2,Tbl:COMPLETE,Col:NONE,Output:["cs_warehouse_sk","cs_order_number"] - <-Reducer 14 [ONE_TO_ONE_EDGE] - FORWARD [RS_36] - PartitionCols:_col0 - Select Operator [SEL_31] (rows=14399440 width=106) - Output:["_col0","_col1"] - Group By Operator [GBY_30] (rows=14399440 width=106) - Output:["_col0"],keys:KEY._col0 - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col0 - Group By Operator [GBY_28] (rows=28798881 width=106) - Output:["_col0"],keys:cr_order_number - Filter Operator [FIL_71] (rows=28798881 width=106) - predicate:cr_order_number is not null - TableScan [TS_25] (rows=28798881 width=106) - default@catalog_returns,cr1,Tbl:COMPLETE,Col:NONE,Output:["cr_order_number"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_80] (rows=383314495 width=135) - Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col4","_col5","_col6"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_19] + Filter Operator [FIL_41] (rows=231905279 width=135) + predicate:_col14 is null + Merge Join Operator [MERGEJOIN_83] (rows=463810558 width=135) + Conds:RS_38._col4=RS_39._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] + <-Reducer 15 [ONE_TO_ONE_EDGE] + FORWARD [RS_39] + PartitionCols:_col0 + Select Operator [SEL_31] (rows=14399440 width=106) + Output:["_col0","_col1"] + Group By Operator [GBY_30] (rows=14399440 width=106) + Output:["_col0"],keys:KEY._col0 + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_29] PartitionCols:_col0 - Select Operator [SEL_11] (rows=30 width=2045) - Output:["_col0"] - Filter Operator [FIL_69] (rows=30 width=2045) - predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) - TableScan [TS_9] (rows=60 width=2045) - default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_79] (rows=348467716 width=135) - Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_16] + Group By Operator [GBY_28] (rows=28798881 width=106) + Output:["_col0"],keys:cr_order_number + Filter Operator [FIL_72] (rows=28798881 width=106) + predicate:cr_order_number is not null + TableScan [TS_25] (rows=28798881 width=106) + default@catalog_returns,cr1,Tbl:COMPLETE,Col:NONE,Output:["cr_order_number"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_38] + PartitionCols:_col4 + Select Operator [SEL_37] (rows=421645953 width=135) + Output:["_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_82] (rows=421645953 width=135) + Conds:RS_34._col4=RS_35._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Group By Operator [GBY_33] (rows=287989836 width=135) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_24] (rows=287989836 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_71] (rows=287989836 width=135) + predicate:(cs_order_number is not null and cs_warehouse_sk is not null) + TableScan [TS_22] (rows=287989836 width=135) + default@catalog_sales,cs2,Tbl:COMPLETE,Col:NONE,Output:["cs_warehouse_sk","cs_order_number"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_81] (rows=383314495 width=135) + Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_8] (rows=20000000 width=1014) + Select Operator [SEL_11] (rows=30 width=2045) Output:["_col0"] - Filter Operator [FIL_68] (rows=20000000 width=1014) - predicate:((ca_state = 'NY') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_78] (rows=316788826 width=135) - Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_66] (rows=287989836 width=135) - predicate:(cs_call_center_sk is not null and cs_order_number is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_13] + Filter Operator [FIL_70] (rows=30 width=2045) + predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) + TableScan [TS_9] (rows=60 width=2045) + default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_80] (rows=348467716 width=135) + Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_16] PartitionCols:_col0 - Select Operator [SEL_5] (rows=8116 width=1119) + Select Operator [SEL_8] (rows=20000000 width=1014) Output:["_col0"] - Filter Operator [FIL_67] (rows=8116 width=1119) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 2001-04-01 00:00:00.0 AND 2001-05-31 01:00:00.0 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + Filter Operator [FIL_69] (rows=20000000 width=1014) + predicate:((ca_state = 'NY') and ca_address_sk is not null) + TableScan [TS_6] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_79] (rows=316788826 width=135) + Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=287989836 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_67] (rows=287989836 width=135) + predicate:(cs_call_center_sk is not null and cs_order_number is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=135) + default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=8116 width=1119) + Output:["_col0"] + Filter Operator [FIL_68] (rows=8116 width=1119) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 2001-04-01 00:00:00.0 AND 2001-05-31 01:00:00.0 and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query94.q.out b/ql/src/test/results/clientpositive/perf/tez/query94.q.out index 1c9d16b01d..e18d45b15f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query94.q.out @@ -57,126 +57,132 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 14 <- Map 13 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 14 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 15 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 - File Output Operator [FS_49] - Limit [LIM_47] (rows=1 width=344) + Reducer 9 + File Output Operator [FS_51] + Limit [LIM_49] (rows=1 width=344) Number of rows:100 - Select Operator [SEL_46] (rows=1 width=344) + Select Operator [SEL_48] (rows=1 width=344) Output:["_col0","_col1","_col2"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_45] - Select Operator [SEL_44] (rows=1 width=344) + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_47] + Select Operator [SEL_46] (rows=1 width=344) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_77] (rows=1 width=344) + Group By Operator [GBY_78] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_76] - Group By Operator [GBY_75] (rows=1 width=344) + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_77] + Group By Operator [GBY_76] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_74] (rows=210834322 width=135) + Group By Operator [GBY_75] (rows=115958879 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_73] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_74] PartitionCols:_col0 - Group By Operator [GBY_72] (rows=210834322 width=135) + Group By Operator [GBY_73] (rows=115958879 width=135) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 - Select Operator [SEL_40] (rows=210834322 width=135) + Select Operator [SEL_42] (rows=115958879 width=135) Output:["_col4","_col5","_col6"] - Filter Operator [FIL_39] (rows=210834322 width=135) - predicate:_col16 is null - Select Operator [SEL_38] (rows=421668645 width=135) - Output:["_col4","_col5","_col6","_col16"] - Merge Join Operator [MERGEJOIN_81] (rows=421668645 width=135) - Conds:RS_34._col4=RS_35._col0(Left Semi),RS_34._col4=RS_36._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14","_col16"],residual filter predicates:{(_col3 <> _col14)} - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_35] - PartitionCols:_col0 - Group By Operator [GBY_33] (rows=144002668 width=135) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_24] (rows=144002668 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_70] (rows=144002668 width=135) - predicate:(ws_order_number is not null and ws_warehouse_sk is not null) - TableScan [TS_22] (rows=144002668 width=135) - default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 14 [ONE_TO_ONE_EDGE] - FORWARD [RS_36] - PartitionCols:_col0 - Select Operator [SEL_31] (rows=7199233 width=92) - Output:["_col0","_col1"] - Group By Operator [GBY_30] (rows=7199233 width=92) - Output:["_col0"],keys:KEY._col0 - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col0 - Group By Operator [GBY_28] (rows=14398467 width=92) - Output:["_col0"],keys:wr_order_number - Filter Operator [FIL_71] (rows=14398467 width=92) - predicate:wr_order_number is not null - TableScan [TS_25] (rows=14398467 width=92) - default@web_returns,wr1,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_80] (rows=191667562 width=135) - Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col4","_col5","_col6"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_19] + Filter Operator [FIL_41] (rows=115958879 width=135) + predicate:_col14 is null + Merge Join Operator [MERGEJOIN_83] (rows=231917759 width=135) + Conds:RS_38._col4=RS_39._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] + <-Reducer 15 [ONE_TO_ONE_EDGE] + FORWARD [RS_39] + PartitionCols:_col0 + Select Operator [SEL_31] (rows=7199233 width=92) + Output:["_col0","_col1"] + Group By Operator [GBY_30] (rows=7199233 width=92) + Output:["_col0"],keys:KEY._col0 + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_29] PartitionCols:_col0 - Select Operator [SEL_11] (rows=42 width=1850) - Output:["_col0"] - Filter Operator [FIL_69] (rows=42 width=1850) - predicate:((web_company_name = 'pri') and web_site_sk is not null) - TableScan [TS_9] (rows=84 width=1850) - default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_79] (rows=174243235 width=135) - Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_16] + Group By Operator [GBY_28] (rows=14398467 width=92) + Output:["_col0"],keys:wr_order_number + Filter Operator [FIL_72] (rows=14398467 width=92) + predicate:wr_order_number is not null + TableScan [TS_25] (rows=14398467 width=92) + default@web_returns,wr1,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"] + <-Reducer 5 [ONE_TO_ONE_EDGE] + FORWARD [RS_38] + PartitionCols:_col4 + Select Operator [SEL_37] (rows=210834322 width=135) + Output:["_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_82] (rows=210834322 width=135) + Conds:RS_34._col4=RS_35._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Group By Operator [GBY_33] (rows=144002668 width=135) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_24] (rows=144002668 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_71] (rows=144002668 width=135) + predicate:(ws_order_number is not null and ws_warehouse_sk is not null) + TableScan [TS_22] (rows=144002668 width=135) + default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_81] (rows=191667562 width=135) + Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_8] (rows=20000000 width=1014) + Select Operator [SEL_11] (rows=42 width=1850) Output:["_col0"] - Filter Operator [FIL_68] (rows=20000000 width=1014) - predicate:((ca_state = 'TX') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_78] (rows=158402938 width=135) - Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=144002668 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_66] (rows=144002668 width=135) - predicate:(ws_order_number is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_13] + Filter Operator [FIL_70] (rows=42 width=1850) + predicate:((web_company_name = 'pri') and web_site_sk is not null) + TableScan [TS_9] (rows=84 width=1850) + default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_80] (rows=174243235 width=135) + Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_16] PartitionCols:_col0 - Select Operator [SEL_5] (rows=8116 width=1119) + Select Operator [SEL_8] (rows=20000000 width=1014) Output:["_col0"] - Filter Operator [FIL_67] (rows=8116 width=1119) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + Filter Operator [FIL_69] (rows=20000000 width=1014) + predicate:((ca_state = 'TX') and ca_address_sk is not null) + TableScan [TS_6] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_79] (rows=158402938 width=135) + Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=144002668 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_67] (rows=144002668 width=135) + predicate:(ws_order_number is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) + TableScan [TS_0] (rows=144002668 width=135) + default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=8116 width=1119) + Output:["_col0"] + Filter Operator [FIL_68] (rows=8116 width=1119) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] diff --git a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out index d816571c3e..3764d92b3b 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out @@ -4122,6 +4122,175 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 500 +PREHOOK: query: explain select * from part ws1 where + exists (select * from part ws2 where ws1.p_type= ws2.p_type + and ws1.p_retailprice <> ws2.p_retailprice) + and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part ws1 where + exists (select * from part ws2 where ws1.p_type= ws2.p_type + and ws1.p_retailprice <> ws2.p_retailprice) + and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ws1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: ws2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_retailprice is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_retailprice (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Map 5 + Map Operator Tree: + TableScan + alias: wr1 + Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col4 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + residual filter predicates: {(_col7 <> _col10)} + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col4 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col10 is null (type: boolean) + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part ws1 where + exists (select * from part ws2 where ws1.p_type= ws2.p_type + and ws1.p_retailprice <> ws2.p_retailprice) + and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null +#### A masked pattern was here #### +POSTHOOK: query: select * from part ws1 where + exists (select * from part ws2 where ws1.p_type= ws2.p_type + and ws1.p_retailprice <> ws2.p_retailprice) + and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null +#### A masked pattern was here #### +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: drop table tnull PREHOOK: type: DROPTABLE PREHOOK: Input: default@tnull