diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 82a8cda735..754c87cc42 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -9410,6 +9410,25 @@ boolean continueJoinMerge() { return true; } + boolean shouldMerge(final QBJoinTree node, final QBJoinTree target) { + boolean isNodeOuterJoin=false, isNodeSemiJoin=false, hasNodePostJoinFilters=false; + boolean isTargetOuterJoin=false, isTargetSemiJoin=false, hasTargetPostJoinFilters=false; + + isNodeOuterJoin = !node.getNoOuterJoin(); + isNodeSemiJoin= !node.getNoSemiJoin(); + hasNodePostJoinFilters = node.getPostJoinFilters().size() !=0 ; + + isTargetOuterJoin = !target.getNoOuterJoin(); + isTargetSemiJoin= !target.getNoSemiJoin(); + hasTargetPostJoinFilters = target.getPostJoinFilters().size() !=0 ; + + if((hasNodePostJoinFilters && (isNodeOuterJoin || isNodeSemiJoin)) + || (hasTargetPostJoinFilters && (isTargetOuterJoin || isTargetSemiJoin))) { + return false; + } + return true; + } + // try merge join tree from inner most source // (it was merged from outer most to inner, which could be invalid) // @@ -9446,8 +9465,7 @@ private void mergeJoinTree(QB qb) { if (prevType != null && prevType != currType) { break; } - if ((!node.getNoOuterJoin() && node.getPostJoinFilters().size() != 0) || - (!target.getNoOuterJoin() && target.getPostJoinFilters().size() != 0)) { + if(!shouldMerge(node, target)) { // Outer joins with post-filtering conditions cannot be merged break; } diff --git a/ql/src/test/queries/clientpositive/subquery_multi.q b/ql/src/test/queries/clientpositive/subquery_multi.q index 86305ad664..780647d082 100644 --- a/ql/src/test/queries/clientpositive/subquery_multi.q +++ b/ql/src/test/queries/clientpositive/subquery_multi.q @@ -126,6 +126,17 @@ select count(*) from src or src.value is not null or exists(select key from src); +-- EXISTS and NOT EXISTS with non-equi predicate +explain select * from part ws1 where + exists (select * from part ws2 where ws1.p_type= ws2.p_type + and ws1.p_retailprice <> ws2.p_retailprice) + and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name); +select * from part ws1 where + exists (select * from part ws2 where ws1.p_type= ws2.p_type + and ws1.p_retailprice <> ws2.p_retailprice) + and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name); + + drop table tnull; drop table tempty; drop table part_null; diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out index 216bc23786..67a22c812f 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out @@ -4161,6 +4161,185 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 500 +PREHOOK: query: explain select * from part ws1 where + exists (select * from part ws2 where ws1.p_type= ws2.p_type + and ws1.p_retailprice <> ws2.p_retailprice) + and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part ws1 where + exists (select * from part ws2 where ws1.p_type= ws2.p_type + and ws1.p_retailprice <> ws2.p_retailprice) + and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: ws1 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: ws2 + Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_retailprice is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_retailprice (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: wr1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col4 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + residual filter predicates: {(_col7 <> _col10)} + Statistics: Num rows: 14 Data size: 8778 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 8666 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 14 Data size: 8666 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col4 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 15 Data size: 9532 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col10 is null (type: boolean) + Statistics: Num rows: 7 Data size: 4448 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 7 Data size: 4448 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 4448 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part ws1 where + exists (select * from part ws2 where ws1.p_type= ws2.p_type + and ws1.p_retailprice <> ws2.p_retailprice) + and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null +#### A masked pattern was here #### +POSTHOOK: query: select * from part ws1 where + exists (select * from part ws2 where ws1.p_type= ws2.p_type + and ws1.p_retailprice <> ws2.p_retailprice) + and not exists(select * from part_null wr1 where ws1.p_type = wr1.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null +#### A masked pattern was here #### +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl PREHOOK: query: drop table tnull PREHOOK: type: DROPTABLE PREHOOK: Input: default@tnull