diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java index dde6288..f548951 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java @@ -19,6 +19,8 @@ import java.util.ArrayList; import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map.Entry; import java.util.Set; @@ -30,8 +32,8 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.RelFactories.FilterFactory; -import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; @@ -39,10 +41,15 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperator; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; @@ -52,8 +59,10 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableList.Builder; import com.google.common.collect.LinkedHashMultimap; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; @@ -79,6 +88,8 @@ GenericUDFOPGreaterThan.class.getAnnotation(Description.class).name(), GenericUDFOPLessThan.class.getAnnotation(Description.class).name(), GenericUDFOPNotEqual.class.getAnnotation(Description.class).name()); + private static final String EQUAL_UDF = + GenericUDFOPEqual.class.getAnnotation(Description.class).name(); private static final String IN_UDF = GenericUDFIn.class.getAnnotation(Description.class).name(); private static final String BETWEEN_UDF = @@ -95,12 +106,6 @@ public void onMatch(RelOptRuleCall call) { final Filter filter = call.rel(0); final RelNode filterChild = call.rel(1); - // 0. If the filter is already on top of a TableScan, - // we can bail out - if (filterChild instanceof TableScan) { - return; - } - final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); final RexNode condition = RexUtil.pullFactors(rexBuilder, filter.getCondition()); @@ -112,12 +117,22 @@ public void onMatch(RelOptRuleCall call) { ImmutableList operands = RexUtil.flattenAnd(((RexCall) condition).getOperands()); for (RexNode operand: operands) { if (operand.getKind() == SqlKind.OR) { - commonOperands.addAll(extractCommonOperands(rexBuilder,operand)); + try { + commonOperands.addAll(extractCommonOperands(rexBuilder,operand)); + } catch (SemanticException e) { + LOG.error("Exception in HivePreFilteringRule", e); + return; + } } } break; case OR: - commonOperands = extractCommonOperands(rexBuilder,condition); + try { + commonOperands = extractCommonOperands(rexBuilder,condition); + } catch (SemanticException e) { + LOG.error("Exception in HivePreFilteringRule", e); + return; + } break; default: return; @@ -159,9 +174,9 @@ public void onMatch(RelOptRuleCall call) { } - private static List extractCommonOperands(RexBuilder rexBuilder, RexNode condition) { + private static List extractCommonOperands(RexBuilder rexBuilder, RexNode condition) throws SemanticException { assert condition.getKind() == SqlKind.OR; - Multimap reductionCondition = LinkedHashMultimap.create(); + Multimap reductionCondition = LinkedHashMultimap.create(); // 1. We extract the information necessary to create the predicate for the new // filter; currently we support comparison functions, in and between @@ -179,21 +194,21 @@ public void onMatch(RelOptRuleCall call) { if(COMPARISON_UDFS.contains(conjCall.getOperator().getName())) { if (conjCall.operands.get(0) instanceof RexInputRef && conjCall.operands.get(1) instanceof RexLiteral) { - reductionCondition.put(conjCall.operands.get(0).toString(), + reductionCondition.put(conjCall.operands.get(0), conjCall); addedToReductionCondition = true; } else if (conjCall.operands.get(1) instanceof RexInputRef && conjCall.operands.get(0) instanceof RexLiteral) { - reductionCondition.put(conjCall.operands.get(1).toString(), + reductionCondition.put(conjCall.operands.get(1), conjCall); addedToReductionCondition = true; } } else if(conjCall.getOperator().getName().equals(IN_UDF)) { - reductionCondition.put(conjCall.operands.get(0).toString(), + reductionCondition.put(conjCall.operands.get(0), conjCall); addedToReductionCondition = true; } else if(conjCall.getOperator().getName().equals(BETWEEN_UDF)) { - reductionCondition.put(conjCall.operands.get(1).toString(), + reductionCondition.put(conjCall.operands.get(1), conjCall); addedToReductionCondition = true; } @@ -207,12 +222,79 @@ public void onMatch(RelOptRuleCall call) { // 2. We gather the common factors and return them List commonOperands = new ArrayList<>(); - for (Entry> pair : reductionCondition.asMap().entrySet()) { + for (Entry> pair : reductionCondition.asMap().entrySet()) { if (pair.getValue().size() == operands.size()) { - commonOperands.add(RexUtil.composeDisjunction(rexBuilder, pair.getValue(), false)); + commonOperands.add(composeNewPredicate(rexBuilder, pair.getKey(), pair.getValue(), false)); } } return commonOperands; } + private static RexNode composeNewPredicate(RexBuilder rexBuilder, RexNode inputRef, + Collection nodes, boolean nullOnEmpty) throws SemanticException { + // Empty set + if (nodes.isEmpty()) { + rexBuilder.makeLiteral(false); + } + + List inOperands = new ArrayList(); + Set literals = new HashSet(); + Builder argTypeBldr = ImmutableList. builder(); + // Adding the input reference + inOperands.add(inputRef); + argTypeBldr.add(inputRef.getType()); + // Other conditions that will not go in the IN clause + Set otherConditions = new HashSet(); + + Iterator nodesIterator = nodes.iterator(); + while (nodesIterator.hasNext()) { + RexCall call = (RexCall) nodesIterator.next(); + if(call.getOperator().getName().equals(EQUAL_UDF)) { + if (call.operands.get(0) instanceof RexInputRef && + call.operands.get(1) instanceof RexLiteral) { + RexLiteral literal = (RexLiteral) call.operands.get(1); + if (literals.add(literal)) { + inOperands.add(literal); + argTypeBldr.add(literal.getType()); + } + } else if (call.operands.get(1) instanceof RexInputRef && + call.operands.get(0) instanceof RexLiteral) { + RexLiteral literal = (RexLiteral) call.operands.get(0); + if (literals.add(literal)) { + inOperands.add(literal); + argTypeBldr.add(literal.getType()); + } + } + } else if(call.getOperator().getName().equals(IN_UDF)) { + for (int i = 1; i < call.operands.size(); i++) { + RexLiteral literal = (RexLiteral) call.operands.get(i); + if (literals.add(literal)) { + inOperands.add(literal); + argTypeBldr.add(literal.getType()); + } + } + } else { + otherConditions.add(call); + } + } + + RexNode inClause = null; + if (!literals.isEmpty()) { + RelDataType retType = TypeConverter.convert( + TypeInfoFactory.booleanTypeInfo, rexBuilder.getTypeFactory()); + SqlOperator inOp = SqlFunctionConverter.getCalciteOperator( + IN_UDF, FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), + argTypeBldr.build(), retType); + inClause = rexBuilder.makeCall(inOp, inOperands); + } + + if (!otherConditions.isEmpty()) { + if (inClause != null) { + otherConditions.add(inClause); + } + return RexUtil.composeDisjunction(rexBuilder, otherConditions, false); + } + return inClause; + } + }