diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java index a4484ec..37ff8cd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java @@ -30,12 +30,11 @@ import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.RelFactories.FilterFactory; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; -import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -76,8 +75,13 @@ public void onMatch(RelOptRuleCall call) { final Join join = call.rel(0); RelNode leftInput = call.rel(1); RelNode rightInput = call.rel(2); + boolean genLeftNotNullPred = (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.RIGHT); + boolean genRightNotNullPred = (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.LEFT); - if (join.getJoinType() != JoinRelType.INNER) { + // TODO: 1. NULL Safe joins shouldn't push "is not null" predicate + // Currently CBO doesn't support NULL-SAFE predicates/join + // 2. Enable it for outer join (remove != INNER check below) + if (join.getJoinType() != JoinRelType.INNER /*!genLeftNotNullPred && !genRightNotNullPred && */) { return; } @@ -101,10 +105,10 @@ public void onMatch(RelOptRuleCall call) { final RelOptCluster cluster = join.getCluster(); final RexBuilder rexBuilder = join.getCluster().getRexBuilder(); - final Map newLeftConditions = getNotNullConditions(cluster, - rexBuilder, leftInput, joinLeftKeyPositions); - final Map newRightConditions = getNotNullConditions(cluster, - rexBuilder, rightInput, joinRightKeyPositions); + final Map newLeftConditions = genLeftNotNullPred ? getNotNullConditions(cluster, + rexBuilder, leftInput, joinLeftKeyPositions) : null; + final Map newRightConditions = genRightNotNullPred ? getNotNullConditions(cluster, + rexBuilder, rightInput, joinRightKeyPositions) : null; // Nothing will be added to the expression if (newLeftConditions == null && newRightConditions == null) { @@ -112,16 +116,10 @@ public void onMatch(RelOptRuleCall call) { } if (newLeftConditions != null) { - if (leftInput instanceof HiveFilter) { - leftInput = leftInput.getInput(0); - } leftInput = createHiveFilterConjunctiveCondition(filterFactory, rexBuilder, leftInput, newLeftConditions.values()); } if (newRightConditions != null) { - if (rightInput instanceof HiveFilter) { - rightInput = rightInput.getInput(0); - } rightInput = createHiveFilterConjunctiveCondition(filterFactory, rexBuilder, rightInput, newRightConditions.values()); } @@ -142,12 +140,9 @@ public void onMatch(RelOptRuleCall call) { final RelDataType returnType = cluster.getTypeFactory(). createSqlType(SqlTypeName.BOOLEAN); - final Map newConditions; - if (input instanceof HiveFilter) { - newConditions = splitCondition(((HiveFilter) input).getCondition()); - } - else { - newConditions = new HashMap(); + final Map newConditions = new HashMap(); + for (RexNode node : RelMetadataQuery.getPulledUpPredicates(input).pulledUpPredicates) { + newConditions.put(node.toString(), node); } for (int pos : inputKeyPositions) { try { @@ -175,20 +170,7 @@ public void onMatch(RelOptRuleCall call) { } return newConditions; } - - private static Map splitCondition(RexNode condition) { - Map newConditions = new HashMap(); - if (condition.getKind() == SqlKind.AND) { - for (RexNode node : ((RexCall) condition).getOperands()) { - newConditions.put(node.toString(), node); - } - } - else { - newConditions.put(condition.toString(), condition); - } - return newConditions; - } - + private static RelNode createHiveFilterConjunctiveCondition(FilterFactory filterFactory, RexBuilder rexBuilder, RelNode input, Collection conditions) { final RexNode newCondition = RexUtil.composeConjunction(rexBuilder, conditions, false); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index dccb598..ae5e56e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -508,6 +508,9 @@ public static boolean prunePartitionNames(List partColumnNames, boolean isUnknown = (isNeeded == null); if (!isUnknown && !isNeeded) { partIter.remove(); + if (LOG.isDebugEnabled()) { + LOG.debug("skipping partition: " + partName); + } continue; } if (isUnknown && values.contains(defaultPartitionName)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 8b46d6c..a08bedd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -940,12 +940,21 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE, SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); - // 2. Add not null filters - if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { - basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE); - } + // 2. Common filter extraction, and PPD + basePlan = hepPlan(basePlan, true, mdProvider, + HivePreFilteringRule.INSTANCE, + new HiveFilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, + HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY), + new HiveFilterSetOpTransposeRule(HiveFilter.DEFAULT_FILTER_FACTORY), + HiveFilterJoinRule.JOIN, + HiveFilterJoinRule.FILTER_ON_JOIN, + new FilterAggregateTransposeRule(Filter.class, + HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class)); + + // 3. Add "IS Not NULL" predicate below Join + basePlan = hepPlan(basePlan, false, mdProvider, HiveJoinAddNotNullRule.INSTANCE); - // 3. Constant propagation, common filter extraction, and PPD + // 4. Constant propagation, common filter extraction, and PPD basePlan = hepPlan(basePlan, true, mdProvider, ReduceExpressionsRule.PROJECT_INSTANCE, ReduceExpressionsRule.FILTER_INSTANCE, @@ -959,19 +968,19 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv new FilterAggregateTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class)); - // 4. Transitive inference & Partition Pruning + // 5. Transitive inference & Partition Pruning basePlan = hepPlan(basePlan, false, mdProvider, new HiveJoinPushTransitivePredicatesRule( Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), new HivePartitionPruneRule(conf)); - // 5. Projection Pruning + // 6. Projection Pruning HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, RelFactories.DEFAULT_SEMI_JOIN_FACTORY, HiveSort.HIVE_SORT_REL_FACTORY, HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); basePlan = fieldTrimmer.trim(basePlan); - // 6. Rerun PPD through Project as column pruning would have introduced DT + // 7. Rerun PPD through Project as column pruning would have introduced DT // above scans basePlan = hepPlan(basePlan, true, mdProvider, new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY,