diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java index ec72047..cf03a4f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java @@ -95,36 +95,40 @@ public void onMatch(RelOptRuleCall call) { * not equi join conditions. */ @Override - protected void validateJoinFilters(List aboveFilters, - List joinFilters, JoinRelBase join, JoinRelType joinType) { - if (joinType.equals(JoinRelType.INNER)) { - ListIterator filterIter = joinFilters.listIterator(); - while (filterIter.hasNext()) { - RexNode exp = filterIter.next(); - if (exp instanceof RexCall) { - RexCall c = (RexCall) exp; - if (c.getOperator().getKind() == SqlKind.EQUALS) { - boolean validHiveJoinFilter = true; - for (RexNode rn : c.getOperands()) { - // NOTE: Hive dis-allows projections from both left - // & - // right side - // of join condition. Example: Hive disallows - // (r1.x=r2.x)=(r1.y=r2.y) on join condition. - if (filterRefersToBothSidesOfJoin(rn, join)) { - validHiveJoinFilter = false; - break; - } - } - if (validHiveJoinFilter) - continue; - } - } - aboveFilters.add(exp); - filterIter.remove(); - } - } - } + protected void validateJoinFilters(List aboveFilters, List joinFilters, + JoinRelBase join, JoinRelType joinType) { + if (joinType.equals(JoinRelType.INNER)) { + ListIterator filterIter = joinFilters.listIterator(); + while (filterIter.hasNext()) { + RexNode exp = filterIter.next(); + if (exp instanceof RexCall) { + RexCall c = (RexCall) exp; + if ((c.getOperator().getKind() == SqlKind.EQUALS) + || (c.getOperator().getKind() == SqlKind.LESS_THAN) + || (c.getOperator().getKind() == SqlKind.GREATER_THAN) + || (c.getOperator().getKind() == SqlKind.LESS_THAN_OR_EQUAL) + || (c.getOperator().getKind() == SqlKind.GREATER_THAN_OR_EQUAL)) { + boolean validHiveJoinFilter = true; + for (RexNode rn : c.getOperands()) { + // NOTE: Hive dis-allows projections from both left + // & + // right side + // of join condition. Example: Hive disallows + // (r1.x=r2.x)=(r1.y=r2.y) on join condition. + if (filterRefersToBothSidesOfJoin(rn, join)) { + validHiveJoinFilter = false; + break; + } + } + if (validHiveJoinFilter) + continue; + } + } + aboveFilters.add(exp); + filterIter.remove(); + } + } + } private boolean filterRefersToBothSidesOfJoin(RexNode filter, JoinRelBase j) { boolean refersToBothSides = false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java index ba07363..28bf2ad 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.optimizer.optiq.stats; import java.util.BitSet; +import java.util.HashSet; +import java.util.List; +import java.util.Set; import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel; @@ -32,6 +35,10 @@ import org.eigenbase.rex.RexNode; import org.eigenbase.rex.RexVisitorImpl; import org.eigenbase.sql.SqlKind; +import org.eigenbase.sql.SqlOperator; +import org.eigenbase.sql.type.SqlTypeUtil; + +import com.google.common.collect.Sets; public class FilterSelectivityEstimator extends RexVisitorImpl { private final RelNode childRel; @@ -61,7 +68,7 @@ public Double visitCall(RexCall call) { } Double selectivity = null; - SqlKind op = call.getKind(); + SqlKind op = getOp(call); switch (op) { case AND: { @@ -74,6 +81,7 @@ public Double visitCall(RexCall call) { break; } + case NOT: case NOT_EQUALS: { selectivity = computeNotEqualitySelectivity(call); break; @@ -88,7 +96,16 @@ public Double visitCall(RexCall call) { } case IN: { - selectivity = ((double) 1 / ((double) call.operands.size())); + // TODO: 1) check for duplicates 2) We assume in clause values to be + // present in NDV which may not be correct (Range check can find it) 3) We + // assume values in NDV set is uniformly distributed over col values + // (account for skewness - histogram). + selectivity = computeFunctionSelectivity(call) * (call.operands.size() - 1); + if (selectivity <= 0.0) { + selectivity = 0.10; + } else if (selectivity >= 1.0) { + selectivity = 1.0; + } break; } @@ -152,18 +169,19 @@ private Double computeDisjunctionSelectivity(RexCall call) { } tmpCardinality = childCardinality * tmpSelectivity; - if (tmpCardinality > 1) + if (tmpCardinality > 1 && tmpCardinality < childCardinality) { tmpSelectivity = (1 - tmpCardinality / childCardinality); - else + } else { tmpSelectivity = 1.0; + } selectivity *= tmpSelectivity; } - if (selectivity > 1) - return (1 - selectivity); - else - return 1.0; + if (selectivity < 0.0) + selectivity = 0.0; + + return (1 - selectivity); } /** @@ -225,4 +243,19 @@ private boolean isPartitionPredicate(RexNode expr, RelNode r) { } return false; } + + private SqlKind getOp(RexCall call) { + SqlKind op = call.getKind(); + + if (call.getKind().equals(SqlKind.OTHER_FUNCTION) + && SqlTypeUtil.inBooleanFamily(call.getType())) { + SqlOperator sqlOp = call.getOperator(); + String opName = (sqlOp != null) ? sqlOp.getName() : ""; + if (opName.equalsIgnoreCase("in")) { + op = SqlKind.IN; + } + } + + return op; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java index c6efff6..7f52c29 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java @@ -278,6 +278,7 @@ private static String getName(GenericUDF hiveUDF) { registerFunction(">=", SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, hToken(HiveParser.GREATERTHANOREQUALTO, ">=")); registerFunction("!", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not")); + registerFunction("<>", SqlStdOperatorTable.NOT_EQUALS, hToken(HiveParser.NOTEQUAL, "<>")); } private void registerFunction(String name, SqlOperator optiqFn, HiveToken hiveToken) {