diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveOrToInClauseRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveOrToInClauseRule.java new file mode 100644 index 0000000..e44b549 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveOrToInClauseRule.java @@ -0,0 +1,226 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ListMultimap; + + +public class HiveOrToInClauseRule extends RelOptRule { + + protected static final Log LOG = LogFactory + .getLog(HiveOrToInClauseRule.class.getName()); + + + public static final HiveOrToInClauseRule INSTANCE = + new HiveOrToInClauseRule(); + + + private HiveOrToInClauseRule() { + super(operand(Filter.class, any())); + } + + public void onMatch(RelOptRuleCall call) { + final Filter filter = call.rel(0); + + final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); + + final RexNode condition = RexUtil.pullFactors(rexBuilder, filter.getCondition()); + + // 1. We try to transform possible candidates + RexNode newCondition; + switch (condition.getKind()) { + case AND: + ImmutableList operands = RexUtil.flattenAnd(((RexCall) condition).getOperands()); + List newOperands = new ArrayList(); + for (RexNode operand: operands) { + RexNode newOperand; + if (operand.getKind() == SqlKind.OR) { + try { + newOperand = transformIntoInClauseCondition(rexBuilder, + filter.getRowType(), operand); + if (newOperand == null) { + return; + } + } catch (SemanticException e) { + LOG.error("Exception in HivePreFilteringRule", e); + return; + } + } else { + newOperand = operand; + } + newOperands.add(newOperand); + } + newCondition = RexUtil.composeConjunction(rexBuilder, newOperands, false); + break; + case OR: + try { + newCondition = transformIntoInClauseCondition(rexBuilder, + filter.getRowType(), condition); + if (newCondition == null) { + return; + } + } catch (SemanticException e) { + LOG.error("Exception in HivePreFilteringRule", e); + return; + } + break; + default: + return; + } + + // 2. If we could not transform anything, we bail out + if (newCondition.toString().equals(condition.toString())) { + return; + } + + // 3. We create the filter with the new condition + RelNode newFilter = filter.copy(filter.getTraitSet(), filter.getInput(), newCondition); + + call.transformTo(newFilter); + + } + + private static RexNode transformIntoInClauseCondition(RexBuilder rexBuilder, RelDataType inputSchema, + RexNode condition) throws SemanticException { + assert condition.getKind() == SqlKind.OR; + + // 1. We extract the information necessary to create the predicate for the new + // filter + ListMultimap columnConstantsMap = ArrayListMultimap.create(); + ImmutableList operands = RexUtil.flattenOr(((RexCall) condition).getOperands()); + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(i); + + final RexNode operandCNF = RexUtil.toCnf(rexBuilder, operand); + final List conjunctions = RelOptUtil.conjunctions(operandCNF); + + for (RexNode conjunction: conjunctions) { + // 1.1. If it is not a RexCall, we bail out + if (!(conjunction instanceof RexCall)) { + return null; + } + // 1.2. We extract the information that we need + RexCall conjCall = (RexCall) conjunction; + if(conjCall.getOperator().getKind() == SqlKind.EQUALS) { + if (conjCall.operands.get(0) instanceof RexInputRef && + conjCall.operands.get(1) instanceof RexLiteral) { + RexInputRef ref = (RexInputRef) conjCall.operands.get(0); + RexLiteral literal = (RexLiteral) conjCall.operands.get(1); + columnConstantsMap.put(ref, literal); + if (columnConstantsMap.get(ref).size() != i+1) { + // If we have not added to this column before, we bail out + return null; + } + } else if (conjCall.operands.get(1) instanceof RexInputRef && + conjCall.operands.get(0) instanceof RexLiteral) { + RexInputRef ref = (RexInputRef) conjCall.operands.get(1); + RexLiteral literal = (RexLiteral) conjCall.operands.get(0); + columnConstantsMap.put(ref, literal); + if (columnConstantsMap.get(ref).size() != i+1) { + // If we have not added to this column before, we bail out + return null; + } + } else { + // Bail out + return null; + } + } else { + return null; + } + } + } + + // 3. We build the new predicate and return it + List newOperands = new ArrayList(operands.size()); + // 3.1 Create structs + List columns = new ArrayList(); + List names = new ArrayList(); + ImmutableList.Builder paramsTypes = ImmutableList.builder(); + List structReturnType = new ArrayList(); + ImmutableList.Builder newOperandsTypes = ImmutableList.builder(); + for (int i = 0; i < operands.size(); i++) { + List constantFields = new ArrayList(operands.size()); + + for (RexInputRef ref : columnConstantsMap.keySet()) { + // If any of the elements was not referenced by every operand, we bail out + if (columnConstantsMap.get(ref).size() <= i) { + return null; + } + RexLiteral columnConstant = columnConstantsMap.get(ref).get(i); + if (i == 0) { + columns.add(ref); + names.add(inputSchema.getFieldNames().get(ref.getIndex())); + paramsTypes.add(ref.getType()); + structReturnType.add(TypeConverter.convert(ref.getType())); + } + constantFields.add(columnConstant); + } + + if (i == 0) { + RexNode columnsRefs; + if (columns.size() == 1) { + columnsRefs = columns.get(0); + } else { + // Create STRUCT clause + columnsRefs = rexBuilder.makeCall(SqlStdOperatorTable.ROW, columns); + } + newOperands.add(columnsRefs); + newOperandsTypes.add(columnsRefs.getType()); + } + RexNode values; + if (constantFields.size() == 1) { + values = constantFields.get(0); + } else { + // Create STRUCT clause + values = rexBuilder.makeCall(SqlStdOperatorTable.ROW, constantFields); + } + newOperands.add(values); + newOperandsTypes.add(values.getType()); + } + + // 4. Create and return IN clause + return rexBuilder.makeCall(HiveIn.INSTANCE, newOperands); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index c36aa9d..88ed0db 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -153,6 +153,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveOrToInClauseRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule; @@ -1152,6 +1153,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv HiveReduceExpressionsRule.PROJECT_INSTANCE, HiveReduceExpressionsRule.FILTER_INSTANCE, HiveReduceExpressionsRule.JOIN_INSTANCE, + HiveOrToInClauseRule.INSTANCE, HiveJoinAddNotNullRule.INSTANCE_JOIN, HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN, HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN, diff --git ql/src/test/queries/clientpositive/filter_in_or_dup.q ql/src/test/queries/clientpositive/filter_in_or_dup.q new file mode 100644 index 0000000..c512b92 --- /dev/null +++ ql/src/test/queries/clientpositive/filter_in_or_dup.q @@ -0,0 +1,6 @@ +EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2') +AND f.key IN ('1', '2'); + diff --git ql/src/test/results/clientpositive/filter_in_or_dup.q.out ql/src/test/results/clientpositive/filter_in_or_dup.q.out new file mode 100644 index 0000000..57d5bc8 --- /dev/null +++ ql/src/test/results/clientpositive/filter_in_or_dup.q.out @@ -0,0 +1,32 @@ +PREHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2') +AND f.key IN ('1', '2') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2') +AND f.key IN ('1', '2') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: f + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + ListSink +