diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index b516925..56b96b4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -1398,6 +1398,13 @@ public static boolean isOpOr(ExprNodeDesc desc) { } /** + * Returns whether the exprNodeDesc is a node of "in". + */ + public static boolean isIn(ExprNodeDesc desc) { + return GenericUDFIn.class == getGenericUDFClassFromExprDesc(desc); + } + + /** * Returns whether the exprNodeDesc is a node of "not". */ public static boolean isOpNot(ExprNodeDesc desc) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index f56cd96..55c71dd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -23,7 +23,6 @@ import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc; import org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationOptimizer; import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication; @@ -83,7 +82,8 @@ public void initialize(HiveConf hiveConf) { } // Try to transform OR predicates in Filter into simpler IN clauses first - if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) { + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER) && + !pctx.getContext().isCboSucceeded()) { final int min = HiveConf.getIntVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN); transformations.add(new PointLookupOptimizer(min)); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java new file mode 100644 index 0000000..9609a1e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java @@ -0,0 +1,381 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.LinkedHashMultimap; +import com.google.common.collect.ListMultimap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; + +/** + * This optimization will take a Filter expression, and if its predicate contains + * an OR operator whose children are constant equality expressions, it will try + * to generate an IN clause (which is more efficient). If the OR operator contains + * AND operator children, the optimization might generate an IN clause that uses + * structs. + */ +public class HivePointLookupOptimizerRule extends RelOptRule { + + protected static final Log LOG = LogFactory.getLog(HivePointLookupOptimizerRule.class); + + + // Minimum number of OR clauses needed to transform into IN clauses + private final int min; + + public HivePointLookupOptimizerRule(int min) { + super(operand(Filter.class, any())); + this.min = min; + } + + public void onMatch(RelOptRuleCall call) { + final Filter filter = call.rel(0); + + final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); + + final RexNode condition = RexUtil.pullFactors(rexBuilder, filter.getCondition()); + + // 1. We try to transform possible candidates + RexTransformIntoInClause transformIntoInClause = new RexTransformIntoInClause(rexBuilder, filter, min); + RexNode newCondition = transformIntoInClause.apply(condition); + + // 2. We merge IN expressions + RexMergeInClause mergeInClause = new RexMergeInClause(rexBuilder); + newCondition = mergeInClause.apply(newCondition); + + // 3. If we could not transform anything, we bail out + if (newCondition.toString().equals(condition.toString())) { + return; + } + + // 4. We create the filter with the new condition + RelNode newFilter = filter.copy(filter.getTraitSet(), filter.getInput(), newCondition); + + call.transformTo(newFilter); + } + + + /** + * Transforms OR clauses into IN clauses, when possible. + */ + protected static class RexTransformIntoInClause extends RexShuttle { + private final RexBuilder rexBuilder; + private final Filter filterOp; + private final int min; + + RexTransformIntoInClause(RexBuilder rexBuilder, Filter filterOp, int min) { + this.filterOp = filterOp; + this.rexBuilder = rexBuilder; + this.min = min; + } + + @Override public RexNode visitCall(RexCall call) { + RexNode node; + switch (call.getKind()) { + case AND: + ImmutableList operands = RexUtil.flattenAnd(((RexCall) call).getOperands()); + List newOperands = new ArrayList(); + for (RexNode operand: operands) { + RexNode newOperand; + if (operand.getKind() == SqlKind.OR) { + try { + newOperand = transformIntoInClauseCondition(rexBuilder, + filterOp.getRowType(), operand, min); + if (newOperand == null) { + return call; + } + } catch (SemanticException e) { + LOG.error("Exception in HivePointLookupOptimizerRule", e); + return call; + } + } else { + newOperand = operand; + } + newOperands.add(newOperand); + } + node = RexUtil.composeConjunction(rexBuilder, newOperands, false); + break; + case OR: + try { + node = transformIntoInClauseCondition(rexBuilder, + filterOp.getRowType(), call, min); + if (node == null) { + return call; + } + } catch (SemanticException e) { + LOG.error("Exception in HivePointLookupOptimizerRule", e); + return call; + } + break; + default: + return super.visitCall(call); + } + return node; + } + + private static RexNode transformIntoInClauseCondition(RexBuilder rexBuilder, RelDataType inputSchema, + RexNode condition, int min) throws SemanticException { + assert condition.getKind() == SqlKind.OR; + + // 1. We extract the information necessary to create the predicate for the new + // filter + ListMultimap columnConstantsMap = ArrayListMultimap.create(); + ImmutableList operands = RexUtil.flattenOr(((RexCall) condition).getOperands()); + if (operands.size() < min) { + // We bail out + return null; + } + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(i); + + final RexNode operandCNF = RexUtil.toCnf(rexBuilder, operand); + final List conjunctions = RelOptUtil.conjunctions(operandCNF); + + for (RexNode conjunction: conjunctions) { + // 1.1. If it is not a RexCall, we bail out + if (!(conjunction instanceof RexCall)) { + return null; + } + // 1.2. We extract the information that we need + RexCall conjCall = (RexCall) conjunction; + if(conjCall.getOperator().getKind() == SqlKind.EQUALS) { + if (conjCall.operands.get(0) instanceof RexInputRef && + conjCall.operands.get(1) instanceof RexLiteral) { + RexInputRef ref = (RexInputRef) conjCall.operands.get(0); + RexLiteral literal = (RexLiteral) conjCall.operands.get(1); + columnConstantsMap.put(ref, literal); + if (columnConstantsMap.get(ref).size() != i+1) { + // If we have not added to this column before, we bail out + return null; + } + } else if (conjCall.operands.get(1) instanceof RexInputRef && + conjCall.operands.get(0) instanceof RexLiteral) { + RexInputRef ref = (RexInputRef) conjCall.operands.get(1); + RexLiteral literal = (RexLiteral) conjCall.operands.get(0); + columnConstantsMap.put(ref, literal); + if (columnConstantsMap.get(ref).size() != i+1) { + // If we have not added to this column before, we bail out + return null; + } + } else { + // Bail out + return null; + } + } else { + return null; + } + } + } + + // 3. We build the new predicate and return it + List newOperands = new ArrayList(operands.size()); + // 3.1 Create structs + List columns = new ArrayList(); + List names = new ArrayList(); + ImmutableList.Builder paramsTypes = ImmutableList.builder(); + List structReturnType = new ArrayList(); + ImmutableList.Builder newOperandsTypes = ImmutableList.builder(); + for (int i = 0; i < operands.size(); i++) { + List constantFields = new ArrayList(operands.size()); + + for (RexInputRef ref : columnConstantsMap.keySet()) { + // If any of the elements was not referenced by every operand, we bail out + if (columnConstantsMap.get(ref).size() <= i) { + return null; + } + RexLiteral columnConstant = columnConstantsMap.get(ref).get(i); + if (i == 0) { + columns.add(ref); + names.add(inputSchema.getFieldNames().get(ref.getIndex())); + paramsTypes.add(ref.getType()); + structReturnType.add(TypeConverter.convert(ref.getType())); + } + constantFields.add(columnConstant); + } + + if (i == 0) { + RexNode columnsRefs; + if (columns.size() == 1) { + columnsRefs = columns.get(0); + } else { + // Create STRUCT clause + columnsRefs = rexBuilder.makeCall(SqlStdOperatorTable.ROW, columns); + } + newOperands.add(columnsRefs); + newOperandsTypes.add(columnsRefs.getType()); + } + RexNode values; + if (constantFields.size() == 1) { + values = constantFields.get(0); + } else { + // Create STRUCT clause + values = rexBuilder.makeCall(SqlStdOperatorTable.ROW, constantFields); + } + newOperands.add(values); + newOperandsTypes.add(values.getType()); + } + + // 4. Create and return IN clause + return rexBuilder.makeCall(HiveIn.INSTANCE, newOperands); + } + + } + + /** + * Merge IN clauses, when possible. + */ + protected static class RexMergeInClause extends RexShuttle { + private final RexBuilder rexBuilder; + + RexMergeInClause(RexBuilder rexBuilder) { + this.rexBuilder = rexBuilder; + } + + @Override public RexNode visitCall(RexCall call) { + RexNode node; + final List operands; + final List newOperands; + Map stringToExpr = Maps.newHashMap(); + Multimap inLHSExprToRHSExprs = LinkedHashMultimap.create(); + switch (call.getKind()) { + case AND: + // IN clauses need to be combined by keeping only common elements + operands = Lists.newArrayList(RexUtil.flattenAnd(((RexCall) call).getOperands())); + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(i); + if (operand.getKind() == SqlKind.IN) { + RexCall inCall = (RexCall) operand; + if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) { + continue; + } + String ref = inCall.getOperands().get(0).toString(); + stringToExpr.put(ref, inCall.getOperands().get(0)); + if (inLHSExprToRHSExprs.containsKey(ref)) { + Set expressions = Sets.newHashSet(); + for (int j = 1; j < inCall.getOperands().size(); j++) { + String expr = inCall.getOperands().get(j).toString(); + expressions.add(expr); + stringToExpr.put(expr, inCall.getOperands().get(j)); + } + inLHSExprToRHSExprs.get(ref).retainAll(expressions); + } else { + for (int j = 1; j < inCall.getOperands().size(); j++) { + String expr = inCall.getOperands().get(j).toString(); + inLHSExprToRHSExprs.put(ref, expr); + stringToExpr.put(expr, inCall.getOperands().get(j)); + } + } + operands.remove(i); + --i; + } + } + // Create IN clauses + newOperands = createInClauses(rexBuilder, stringToExpr, inLHSExprToRHSExprs); + newOperands.addAll(operands); + // Return node + node = RexUtil.composeConjunction(rexBuilder, newOperands, false); + break; + case OR: + // IN clauses need to be combined by keeping all elements + operands = Lists.newArrayList(RexUtil.flattenOr(((RexCall) call).getOperands())); + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(i); + if (operand.getKind() == SqlKind.IN) { + RexCall inCall = (RexCall) operand; + if (!HiveCalciteUtil.isDeterministic(inCall.getOperands().get(0))) { + continue; + } + String ref = inCall.getOperands().get(0).toString(); + stringToExpr.put(ref, inCall.getOperands().get(0)); + for (int j = 1; j < inCall.getOperands().size(); j++) { + String expr = inCall.getOperands().get(j).toString(); + inLHSExprToRHSExprs.put(ref, expr); + stringToExpr.put(expr, inCall.getOperands().get(j)); + } + operands.remove(i); + --i; + } + } + // Create IN clauses + newOperands = createInClauses(rexBuilder, stringToExpr, inLHSExprToRHSExprs); + newOperands.addAll(operands); + // Return node + node = RexUtil.composeDisjunction(rexBuilder, newOperands, false); + break; + default: + return super.visitCall(call); + } + return node; + } + + private static List createInClauses(RexBuilder rexBuilder, Map stringToExpr, + Multimap inLHSExprToRHSExprs) { + List newExpressions = Lists.newArrayList(); + for (Entry> entry : inLHSExprToRHSExprs.asMap().entrySet()) { + String ref = entry.getKey(); + Collection exprs = entry.getValue(); + if (exprs.isEmpty()) { + newExpressions.add(rexBuilder.makeLiteral(false)); + } else { + List newOperands = new ArrayList(exprs.size() + 1); + newOperands.add(stringToExpr.get(ref)); + for (String expr : exprs) { + newOperands.add(stringToExpr.get(expr)); + } + newExpressions.add(rexBuilder.makeCall(HiveIn.INSTANCE, newOperands)); + } + } + return newExpressions; + } + + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java index 9cc9ea9..9911179 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java @@ -25,8 +25,6 @@ import java.util.Map; import java.util.Stack; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; @@ -49,13 +47,12 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Expression processor factory for partition condition removing. Each processor tries to @@ -368,50 +365,66 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return getResultWrapFromResults(results, fd, newNodeOutputs); } return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs)); - } else if (fd.getGenericUDF() instanceof GenericUDFIn) { - List children = fd.getChildren(); - boolean removePredElem = false; - ExprNodeDesc lhs = children.get(0); - - if (lhs instanceof ExprNodeGenericFuncDesc) { - // Make sure that the generic udf is deterministic - if (FunctionRegistry.isDeterministic(((ExprNodeGenericFuncDesc) lhs) - .getGenericUDF())) { - boolean hasOnlyPartCols = true; - boolean hasDynamicListDesc = false; - - for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) lhs).getChildren()) { - // Check if the current field expression contains only - // partition column or a virtual column or constants. - // If yes, this filter predicate is a candidate for this optimization. - if (!(ed instanceof ExprNodeColumnDesc && - ((ExprNodeColumnDesc)ed).getIsPartitionColOrVirtualCol())) { - hasOnlyPartCols = false; - break; - } - } + } else if (FunctionRegistry.isIn(fd)) { + List children = fd.getChildren(); + boolean removePredElem = false; + ExprNodeDesc lhs = children.get(0); + + if (lhs instanceof ExprNodeColumnDesc) { + // It is an IN clause on a column + if (((ExprNodeColumnDesc)lhs).getIsPartitionColOrVirtualCol()) { + // It is a partition column, we can proceed + removePredElem = true; + } + if (removePredElem) { + // We should not remove the dynamic partition pruner generated synthetic predicates. + for (int i = 1; i < children.size(); i++) { + if (children.get(i) instanceof ExprNodeDynamicListDesc) { + removePredElem = false; + break; + } + } + } + } else if (lhs instanceof ExprNodeGenericFuncDesc) { + // It is an IN clause on a struct + // Make sure that the generic udf is deterministic + if (FunctionRegistry.isDeterministic(((ExprNodeGenericFuncDesc) lhs) + .getGenericUDF())) { + boolean hasOnlyPartCols = true; + boolean hasDynamicListDesc = false; + + for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) lhs).getChildren()) { + // Check if the current field expression contains only + // partition column or a virtual column or constants. + // If yes, this filter predicate is a candidate for this optimization. + if (!(ed instanceof ExprNodeColumnDesc && + ((ExprNodeColumnDesc)ed).getIsPartitionColOrVirtualCol())) { + hasOnlyPartCols = false; + break; + } + } - // If we have non-partition columns, we cannot remove the predicate. - if (hasOnlyPartCols) { - // We should not remove the dynamic partition pruner generated synthetic predicates. - for (int i = 1; i < children.size(); i++) { - if (children.get(i) instanceof ExprNodeDynamicListDesc) { - hasDynamicListDesc = true; - break; - } - } + // If we have non-partition columns, we cannot remove the predicate. + if (hasOnlyPartCols) { + // We should not remove the dynamic partition pruner generated synthetic predicates. + for (int i = 1; i < children.size(); i++) { + if (children.get(i) instanceof ExprNodeDynamicListDesc) { + hasDynamicListDesc = true; + break; } - - removePredElem = hasOnlyPartCols && !hasDynamicListDesc; } + } + + removePredElem = hasOnlyPartCols && !hasDynamicListDesc; } + } - // If removePredElem is set to true, return true as this is a potential candidate - // for partition condition remover. Else, set the WalkState for this node to unknown. - return removePredElem ? - new NodeInfoWrapper(WalkState.TRUE, null, - new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE)) : - new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)) ; + // If removePredElem is set to true, return true as this is a potential candidate + // for partition condition remover. Else, set the WalkState for this node to unknown. + return removePredElem ? + new NodeInfoWrapper(WalkState.TRUE, null, + new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE)) : + new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)) ; } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) { // If it's a non-deterministic UDF, set unknown to true return new NodeInfoWrapper(WalkState.UNKNOWN, null, diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index fd2246b..b59347d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -153,6 +153,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePointLookupOptimizerRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule; @@ -1138,23 +1139,32 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // 3. Run exhaustive PPD, add not null filters, transitive inference, // constant propagation, constant folding + List rules = Lists.newArrayList(); + if (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_WINDOWING)) { + rules.add(HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC_WINDOWING); + } else { + rules.add(HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC); + } + rules.add(HiveFilterSetOpTransposeRule.INSTANCE); + rules.add(HiveFilterSortTransposeRule.INSTANCE); + rules.add(HiveFilterJoinRule.JOIN); + rules.add(HiveFilterJoinRule.FILTER_ON_JOIN); + rules.add(new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class)); + rules.add(new FilterMergeRule(HiveRelFactories.HIVE_FILTER_FACTORY)); + rules.add(HiveReduceExpressionsRule.PROJECT_INSTANCE); + rules.add(HiveReduceExpressionsRule.FILTER_INSTANCE); + rules.add(HiveReduceExpressionsRule.JOIN_INSTANCE); + if (conf.getBoolVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) { + final int min = conf.getIntVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN); + rules.add(new HivePointLookupOptimizerRule(min)); + } + rules.add(HiveJoinAddNotNullRule.INSTANCE_JOIN); + rules.add(HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN); + rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN); + rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, - conf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_WINDOWING) ? HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC_WINDOWING - : HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC, - HiveFilterSetOpTransposeRule.INSTANCE, - HiveFilterSortTransposeRule.INSTANCE, - HiveFilterJoinRule.JOIN, - HiveFilterJoinRule.FILTER_ON_JOIN, - new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class), - new FilterMergeRule(HiveRelFactories.HIVE_FILTER_FACTORY), - HiveReduceExpressionsRule.PROJECT_INSTANCE, - HiveReduceExpressionsRule.FILTER_INSTANCE, - HiveReduceExpressionsRule.JOIN_INSTANCE, - HiveJoinAddNotNullRule.INSTANCE_JOIN, - HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN, - HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN, - HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN); + rules.toArray(new RelOptRule[rules.size()])); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding"); diff --git ql/src/test/queries/clientpositive/auto_join19_inclause.q ql/src/test/queries/clientpositive/auto_join19_inclause.q new file mode 100644 index 0000000..7773289 --- /dev/null +++ ql/src/test/queries/clientpositive/auto_join19_inclause.q @@ -0,0 +1,18 @@ +set hive.mapred.mode=nonstrict; +set hive.auto.convert.join = true; +set hive.optimize.point.lookup.min=2; + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE; + +explain +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11'); + + +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11'); + + +SELECT sum(hash(dest1.key,dest1.value)) FROM dest1; diff --git ql/src/test/queries/clientpositive/filter_in_or_dup.q ql/src/test/queries/clientpositive/filter_in_or_dup.q new file mode 100644 index 0000000..34a5139 --- /dev/null +++ ql/src/test/queries/clientpositive/filter_in_or_dup.q @@ -0,0 +1,19 @@ +set hive.optimize.point.lookup.min=2; + +EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2') +AND f.key IN ('1', '2'); + +EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key = '2') +AND f.key IN ('1', '2', '3'); + +EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2' OR f.key='3') +AND f.key IN ('1', '2'); diff --git ql/src/test/results/clientpositive/auto_join19_inclause.q.out ql/src/test/results/clientpositive/auto_join19_inclause.q.out new file mode 100644 index 0000000..3f70055 --- /dev/null +++ ql/src/test/results/clientpositive/auto_join19_inclause.q.out @@ -0,0 +1,130 @@ +PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: explain +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:src2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_1:src2 + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col4 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col4 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +407444119660 diff --git ql/src/test/results/clientpositive/filter_in_or_dup.q.out ql/src/test/results/clientpositive/filter_in_or_dup.q.out new file mode 100644 index 0000000..f863ac3 --- /dev/null +++ ql/src/test/results/clientpositive/filter_in_or_dup.q.out @@ -0,0 +1,96 @@ +PREHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2') +AND f.key IN ('1', '2') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2') +AND f.key IN ('1', '2') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: f + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key = '2') +AND f.key IN ('1', '2', '3') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key = '2') +AND f.key IN ('1', '2', '3') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: f + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2' OR f.key='3') +AND f.key IN ('1', '2') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.key +FROM cbo_t1 f +WHERE (f.key = '1' OR f.key='2' OR f.key='3') +AND f.key IN ('1', '2') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: f + Statistics: Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key) IN ('1', '2') (type: boolean) + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 131 Basic stats: COMPLETE Column stats: NONE + ListSink + diff --git ql/src/test/results/clientpositive/pointlookup2.q.out ql/src/test/results/clientpositive/pointlookup2.q.out index fb17e72..869e4cd 100644 --- ql/src/test/results/clientpositive/pointlookup2.q.out +++ ql/src/test/results/clientpositive/pointlookup2.q.out @@ -985,21 +985,17 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (ds) IN ('2000-04-08', '2000-04-09') (type: boolean) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) - auto parallelism: false + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + auto parallelism: false TableScan alias: t2 Statistics: Num rows: 1 Data size: 18 Basic stats: COMPLETE Column stats: NONE @@ -1169,11 +1165,11 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: (struct(_col4,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -1201,7 +1197,7 @@ STAGE PLANS: key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) auto parallelism: false @@ -1235,13 +1231,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/pointlookup3.q.out ql/src/test/results/clientpositive/pointlookup3.q.out index d5c4157..e98ba76 100644 --- ql/src/test/results/clientpositive/pointlookup3.q.out +++ ql/src/test/results/clientpositive/pointlookup3.q.out @@ -129,7 +129,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (struct(ds1,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + predicate: (struct(key,ds1)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) @@ -374,14 +374,14 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key = 1) (type: boolean) + predicate: (struct(key,ds1)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), ds1 (type: string) - outputColumnNames: _col1, _col2 + expressions: key (type: int), value (type: string), ds1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 1 (type: int), _col1 (type: string), _col2 (type: string), '2001-04-08' (type: string) + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), '2001-04-08' (type: string) null sort order: aaaa sort order: ++++ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE @@ -441,7 +441,7 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Select Operator - expressions: 1 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), '2001-04-08' (type: string) + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), '2001-04-08' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1149,21 +1149,17 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (ds1) IN ('2000-04-08', '2000-04-09') (type: boolean) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) - auto parallelism: false + Select Operator + expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) + auto parallelism: false TableScan alias: t1 Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE @@ -1337,11 +1333,11 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column stats: NONE Filter Operator isSamplingPred: false - predicate: (struct(_col4,_col2)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(_col2,_col4)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -1369,7 +1365,7 @@ STAGE PLANS: key expressions: _col4 (type: int), _col5 (type: string), _col2 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: string) auto parallelism: false @@ -1403,13 +1399,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string), KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: string), VALUE._col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/pointlookup4.q.out ql/src/test/results/clientpositive/pointlookup4.q.out index 0a9bd3e..6236272 100644 --- ql/src/test/results/clientpositive/pointlookup4.q.out +++ ql/src/test/results/clientpositive/pointlookup4.q.out @@ -384,7 +384,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (struct(ds1,key,ds2)) IN (const struct('2000-04-08',1,'2001-04-08'), const struct('2000-04-09',2,'2001-04-09')) (type: boolean) + predicate: (struct(key,ds1,ds2)) IN (const struct(1,'2000-04-08','2001-04-08'), const struct(2,'2000-04-09','2001-04-09')) (type: boolean) Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds1 (type: string), ds2 (type: string)