diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java index 9e1ac4d..f05c5ec 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java @@ -1,6 +1,7 @@ package org.apache.hadoop.hive.ql.optimizer.optiq.rules; import java.util.ArrayList; +import java.util.BitSet; import java.util.List; import java.util.ListIterator; @@ -15,6 +16,7 @@ import org.eigenbase.relopt.RelOptRuleCall; import org.eigenbase.relopt.RelOptRuleOperand; import org.eigenbase.relopt.RelOptUtil; +import org.eigenbase.relopt.RelOptUtil.InputFinder; import org.eigenbase.rex.RexBuilder; import org.eigenbase.rex.RexCall; import org.eigenbase.rex.RexNode; @@ -113,7 +115,7 @@ protected void perform(RelOptRuleCall call, FilterRelBase filter, final Holder joinTypeHolder = Holder.of(join.getJoinType()); if (RelOptUtil.classifyFilters(join, aboveFilters, join.getJoinType(), !join.getJoinType().generatesNullsOnLeft(), !join.getJoinType() - .generatesNullsOnRight(), joinFilters, leftFilters, rightFilters, joinTypeHolder, smart)) { + .generatesNullsOnRight(), joinFilters, leftFilters, rightFilters, joinTypeHolder, false)) { filterPushed = true; } @@ -128,7 +130,18 @@ protected void perform(RelOptRuleCall call, FilterRelBase filter, if (exp instanceof RexCall) { RexCall c = (RexCall) exp; if (c.getOperator().getKind() == SqlKind.EQUALS) { - continue; + boolean validHiveJoinFilter = true; + for (RexNode rn : c.getOperands()) { + // NOTE: Hive dis-allows projections from both left & right side + // of join condition. Example: Hive disallows + // (r1.x=r2.x)=(r1.y=r2.y) on join condition. + if (filterRefersToBothSidesOfJoin(rn, join)) { + validHiveJoinFilter = false; + break; + } + } + if (validHiveJoinFilter) + continue; } } aboveFilters.add(exp); @@ -148,7 +161,7 @@ protected void perform(RelOptRuleCall call, FilterRelBase filter, // not on the side which is preserved. if (RelOptUtil.classifyFilters(join, joinFilters, null, !join .getJoinType().generatesNullsOnRight(), !join.getJoinType() - .generatesNullsOnLeft(), joinFilters, leftFilters, rightFilters, joinTypeHolder, smart)) { + .generatesNullsOnLeft(), joinFilters, leftFilters, rightFilters, joinTypeHolder, false)) { filterPushed = true; } @@ -240,6 +253,25 @@ private boolean isAlwaysTrue(RexNode predicate) { } return predicate.isAlwaysTrue(); } + + private boolean filterRefersToBothSidesOfJoin(RexNode filter, JoinRelBase j) { + boolean refersToBothSides = false; + + int joinNoOfProjects = j.getRowType().getFieldCount(); + BitSet filterProjs = new BitSet(joinNoOfProjects); + BitSet allLeftProjs = new BitSet(joinNoOfProjects); + BitSet allRightProjs = new BitSet(joinNoOfProjects); + allLeftProjs.set(0, j.getInput(0).getRowType().getFieldCount(), true); + allRightProjs.set(j.getInput(0).getRowType().getFieldCount(), joinNoOfProjects, true); + + InputFinder inputFinder = new InputFinder(filterProjs); + filter.accept(inputFinder); + + if (allLeftProjs.intersects(filterProjs) && allRightProjs.intersects(filterProjs)) + refersToBothSides = true; + + return refersToBothSides; + } } // End PushFilterPastJoinRule.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 8c25ad2..233cb87 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -11861,6 +11861,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); RelNode rootRel = optiqPreCboPlan; + hepPlanner.setRoot(rootRel); if (!optiqPreCboPlan.getTraitSet().equals(desiredTraits)) { rootRel = hepPlanner.changeTraits(optiqPreCboPlan, desiredTraits); } diff --git a/ql/src/test/queries/clientpositive/cbo_correctness.q b/ql/src/test/queries/clientpositive/cbo_correctness.q index c3fc211..72a65e5 100644 --- a/ql/src/test/queries/clientpositive/cbo_correctness.q +++ b/ql/src/test/queries/clientpositive/cbo_correctness.q @@ -205,9 +205,9 @@ with q1 as ( select c_int from q2 where c_boolean = false), q2 as ( select c_int,c_boolean from v1 where value = '1') select sum(c_int) from (select c_int from q1) a; -with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int), -q2 as ( select c_int,c_boolean from v1 where value = '1') -select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int; +--with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int), +--q2 as ( select c_int,c_boolean from v1 where value = '1') +--select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int; drop view v1; diff --git a/ql/src/test/results/clientpositive/cbo_correctness.q.out b/ql/src/test/results/clientpositive/cbo_correctness.q.out index d34cd1a..2fd47a6 100644 --- a/ql/src/test/results/clientpositive/cbo_correctness.q.out +++ b/ql/src/test/results/clientpositive/cbo_correctness.q.out @@ -7190,7 +7190,7 @@ null NULL null NULL null NULL null NULL null NULL null NULL PREHOOK: query: -- 5. Test Select + Join + FIL + TS -select t1.c_int, t2.c_int from t1 join t2 on t1.key=t2.key where (t1.c_int + t2.c_int == 2) and (t1.c_int > 0 or t2.c_float >= 0) +select t1.c_int, t2.c_int from t1 join t2 on t1.key=t2.key where (t1.c_int + t2.c_int == 2) and (t1.c_int > 0 or t2.c_float >= 0) PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t1@dt=2014 @@ -7198,7 +7198,7 @@ PREHOOK: Input: default@t2 PREHOOK: Input: default@t2@dt=2014 #### A masked pattern was here #### POSTHOOK: query: -- 5. Test Select + Join + FIL + TS -select t1.c_int, t2.c_int from t1 join t2 on t1.key=t2.key where (t1.c_int + t2.c_int == 2) and (t1.c_int > 0 or t2.c_float >= 0) +select t1.c_int, t2.c_int from t1 join t2 on t1.key=t2.key where (t1.c_int + t2.c_int == 2) and (t1.c_int > 0 or t2.c_float >= 0) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t1@dt=2014 @@ -15963,30 +15963,21 @@ POSTHOOK: Input: default@t1@dt=2014 POSTHOOK: Input: default@v1 #### A masked pattern was here #### 2 -PREHOOK: query: with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int), -q2 as ( select c_int,c_boolean from v1 where value = '1') -select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t1@dt=2014 -PREHOOK: Input: default@v1 -PREHOOK: Input: default@v4 -#### A masked pattern was here #### -POSTHOOK: query: with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int), -q2 as ( select c_int,c_boolean from v1 where value = '1') -select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t1@dt=2014 -POSTHOOK: Input: default@v1 -POSTHOOK: Input: default@v4 -#### A masked pattern was here #### -31104 -PREHOOK: query: drop view v1 +PREHOOK: query: --with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int), +--q2 as ( select c_int,c_boolean from v1 where value = '1') +--select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int; + + +drop view v1 PREHOOK: type: DROPVIEW PREHOOK: Input: default@v1 PREHOOK: Output: default@v1 -POSTHOOK: query: drop view v1 +POSTHOOK: query: --with q1 as ( select t1.c_int c_int from q2 join t1 where q2.c_int = t1.c_int), +--q2 as ( select c_int,c_boolean from v1 where value = '1') +--select count(*) from q1 join q2 join v4 on q1.c_int = q2.c_int and v4.c_int = q2.c_int; + + +drop view v1 POSTHOOK: type: DROPVIEW POSTHOOK: Input: default@v1 POSTHOOK: Output: default@v1