diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 8a00079..15f6da5 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1192,8 +1192,6 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "Whether to transform OR clauses in Filter operators into IN clauses"), HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 31, "Minimum number of OR clauses needed to transform into IN clauses"), - HIVEPOINTLOOKUPOPTIMIZEREXTRACT("hive.optimize.point.lookup.extract", true, - "Extract partial expressions when optimizing point lookup IN clauses"), // Constant propagation optimizer HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"), HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, "Removes identity project from operator tree"), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 439f616..7dc8ad2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -73,11 +73,7 @@ public void initialize(HiveConf hiveConf) { if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) { final int min = HiveConf.getIntVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN); - final boolean extract = HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZEREXTRACT); - final boolean testMode = HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_IN_TEST); - transformations.add(new PointLookupOptimizer(min, extract, testMode)); + transformations.add(new PointLookupOptimizer(min)); } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) { @@ -95,6 +91,7 @@ public void initialize(HiveConf hiveConf) { } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) { + transformations.add(new PartitionColumnsSeparator()); transformations.add(new PartitionPruner()); transformations.add(new PartitionConditionRemover()); if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java new file mode 100644 index 0000000..e84b37c --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java @@ -0,0 +1,419 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.IdentityHashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.ForwardWalker; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.TypeRule; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +/** + * This optimization will take a Filter expression, and if its predicate contains + * an IN operator whose children are constant structs or structs containing constant fields, + * it will try to generate predicate with IN clauses containing only partition columns. + * This predicate is in turn used by the partition pruner to prune the columns that are not + * part of the original IN(STRUCT(..)..) predicate. + */ +public class PartitionColumnsSeparator implements Transform { + + private static final Log LOG = LogFactory.getLog(PointLookupOptimizer.class); + private static final String IN_UDF = + GenericUDFIn.class.getAnnotation(Description.class).name(); + private static final String STRUCT_UDF = + GenericUDFStruct.class.getAnnotation(Description.class).name(); + private static final String AND_UDF = + GenericUDFOPAnd.class.getAnnotation(Description.class).name(); + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + // 1. Trigger transformation + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), new StructInTransformer()); + + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); + GraphWalker ogw = new ForwardWalker(disp); + + List topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + return pctx; + } + + private class StructInTransformer implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + FilterOperator filterOp = (FilterOperator) nd; + ExprNodeDesc predicate = filterOp.getConf().getPredicate(); + + // Generate the list bucketing pruning predicate as 2 separate IN clauses + // containing the partitioning and non-partitioning columns. + ExprNodeDesc newPredicate = generateInClauses(predicate); + if (newPredicate != null) { + // Replace filter in current FIL with new FIL + if (LOG.isDebugEnabled()) { + LOG.debug("Generated new predicate with IN clause: " + newPredicate); + } + filterOp.getConf().setPartitionpruningPredicate(newPredicate); + } + + return null; + } + + private ExprNodeDesc generateInClauses(ExprNodeDesc predicate) throws SemanticException { + Map exprRules = new LinkedHashMap(); + exprRules.put(new TypeRule(ExprNodeGenericFuncDesc.class), new StructInExprProcessor()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(null, exprRules, null); + GraphWalker egw = new PreOrderOnceWalker(disp); + + List startNodes = new ArrayList(); + startNodes.add(predicate); + + HashMap outputMap = new HashMap(); + egw.startWalking(startNodes, outputMap); + return (ExprNodeDesc) outputMap.get(predicate); + } + } + + private class StructInExprProcessor implements NodeProcessor { + + // Mapping from expression node to is an expression containing only + // partition or virtual column or constants + private Map exprNodeToPartOrVirtualColExpr = + new IdentityHashMap(); + + private boolean exprContainsOnlyPartitionColOrVirtualColOrConstants(ExprNodeDesc en) { + if (en == null) { + return true; + } + if (exprNodeToPartOrVirtualColExpr.containsKey(en)) { + return exprNodeToPartOrVirtualColExpr.get(en); + } + if (en instanceof ExprNodeColumnDesc) { + boolean ret = ((ExprNodeColumnDesc)en).getIsPartitionColOrVirtualCol(); + exprNodeToPartOrVirtualColExpr.put(en, ret); + return ret; + } + if (en instanceof ExprNodeConstantDesc || en.getChildren() == null) { + exprNodeToPartOrVirtualColExpr.put(en, true); + return true; + } + for (ExprNodeDesc cn : en.getChildren()) { + if (!exprContainsOnlyPartitionColOrVirtualColOrConstants(cn)) { + exprNodeToPartOrVirtualColExpr.put(en, false); + return false; + } + } + exprNodeToPartOrVirtualColExpr.put(en, true); + return true; + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) nd; + + /***************************************************************************************\ + BEGIN : Early terminations for Partition Column Separator + /***************************************************************************************/ + // 1. If it is not an IN operator, we bail out. + if (!(fd.getGenericUDF() instanceof GenericUDFIn)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", is not IN operator : "); + } + return null; + } + + // 2. It is an IN operator with struct children + List children = fd.getChildren(); + if (children.size() < 2 || !(children.get(0) instanceof ExprNodeGenericFuncDesc) || + (!(((ExprNodeGenericFuncDesc) children.get(0)).getGenericUDF() + instanceof GenericUDFStruct))) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", children size " + + children.size() + ", child expression : " + children.get(0).getExprString()); + } + return null; + } + + // 3. See if there are partition columns in the struct, if not bail out. + boolean isValidOptimization = false; + for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) children.get(0)).getChildren()) { + // Check if the current field expression contains only + // partition column or a virtual column or constants. + // If yes, this filter predicate is a candidate for this optimization. + if (!(ed instanceof ExprNodeConstantDesc) && + exprContainsOnlyPartitionColOrVirtualColOrConstants(ed)) { + isValidOptimization = true; + break; + } + } + if (!isValidOptimization) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + + ", there are no partition columns in struct fields"); + } + return null; + } + /***************************************************************************************\ + END : Early terminations for Partition Column Separator + /***************************************************************************************/ + + Map> tableAliasToExprNodeDesc = + new HashMap>(); + Map> tableAliasToPartName = new HashMap>(); + Map> tableAliasToTypeInfo = new HashMap>(); + ExprNodeGenericFuncDesc originalStructDesc = ((ExprNodeGenericFuncDesc) children.get(0)); + List originalDescChildren = originalStructDesc.getChildren(); + + // Set the first row of the IN clauses which is the struct field metadata. + for (ExprNodeDesc en : originalDescChildren) { + if (exprContainsOnlyPartitionColOrVirtualColOrConstants(en)) { + List exprNodeDescList; + List partNameList; + List typeInfoList; + String tabAlias = en instanceof ExprNodeColumnDesc ? ((ExprNodeColumnDesc)en).getTabAlias() : + en.getExprString(); + + if (!tableAliasToExprNodeDesc.containsKey(tabAlias)) { + exprNodeDescList = new ArrayList(); + exprNodeDescList.add(en); + tableAliasToExprNodeDesc.put(tabAlias, exprNodeDescList); + + partNameList = new ArrayList(); + partNameList.add(en.getName()); + tableAliasToPartName.put(tabAlias, partNameList); + + typeInfoList = new ArrayList(); + typeInfoList.add(en.getTypeInfo()); + tableAliasToTypeInfo.put(tabAlias, typeInfoList); + } else { + exprNodeDescList = tableAliasToExprNodeDesc.get(tabAlias); + exprNodeDescList.add(en); + + partNameList = tableAliasToPartName.get(tabAlias); + partNameList.add(en.getName()); + + typeInfoList = tableAliasToTypeInfo.get(tabAlias); + typeInfoList.add(en.getTypeInfo()); + } + } + } + + Map> tableAliasToInStruct = + new HashMap>(); + + for (Map.Entry> entry : tableAliasToExprNodeDesc.entrySet()) { + String currTabAlias = entry.getKey(); + List currStructExprList = new ArrayList(); + currStructExprList.add( new ExprNodeGenericFuncDesc( + TypeInfoFactory.getStructTypeInfo(tableAliasToPartName.get(currTabAlias), + tableAliasToTypeInfo.get(currTabAlias)), + FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(), + entry.getValue())); + tableAliasToInStruct.put(currTabAlias, currStructExprList); + } + + /** BEGIN FOR LOOP : Convert each row of the IN list to a list of structs. */ + for (int i = 1; i < children.size(); i++) { + // The children better be either constant structs or Generic Struct UDFs + // containing constant values, otherwise we cannot do this optimization. + if (!(children.get(i) instanceof ExprNodeConstantDesc || + (children.get(i) instanceof ExprNodeGenericFuncDesc && + ((ExprNodeGenericFuncDesc) children.get(i)). + getGenericUDF() instanceof GenericUDFStruct))) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + + ", child not a constant struct or Generic UDF struct " + + children.get(i).getExprString()); + } + return null; + } + + Map> tabAliasToConstPartColumns = new HashMap>(); + List constPartColumns = null; + + // Case 1 : The struct is a constant struct in which case the the value should be + // list of field values. + if (children.get(i) instanceof ExprNodeConstantDesc) { + // Sanity check. If the constant values do not match the metadata of the struct, + // return null. + if (!(((ExprNodeConstantDesc) (children.get(i))).getValue() instanceof List) || + ((List)((ExprNodeConstantDesc) (children.get(i))). + getValue()).size() != originalDescChildren.size()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", " + + children.get(i) + ", object value is : " + + ((ExprNodeConstantDesc) (children.get(i))).getValue()); + } + return null; + } + List cnCols = (List)(((ExprNodeConstantDesc) + (children.get(i))).getValue()); + + // For each field in the struct, add it to the constant partition columns + // or constant non-partition columns. + for (int j = 0; j < originalDescChildren.size(); j++) { + ExprNodeDesc en = originalDescChildren.get(j); + + if (exprContainsOnlyPartitionColOrVirtualColOrConstants(en)) { + String currTabAlias = en instanceof ExprNodeColumnDesc ? + ((ExprNodeColumnDesc)en).getTabAlias() : en.getExprString(); + + if (currTabAlias == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", " + + children.get(i) + ", internal error for unknown table name for column : " + + j); + } + return null; + } + if (!tabAliasToConstPartColumns.containsKey(currTabAlias)) { + constPartColumns = new ArrayList(); + constPartColumns.add(new ExprNodeConstantDesc(cnCols.get(j))); + tabAliasToConstPartColumns.put(currTabAlias, constPartColumns); + } else { + constPartColumns = tabAliasToConstPartColumns.get(currTabAlias); + constPartColumns.add(new ExprNodeConstantDesc(cnCols.get(j))); + } + } + } + } else { + List cnChildren = ((ExprNodeGenericFuncDesc) children.get(i)). + getChildren(); + // Case 2 : The struct is a generic UDF struct in which case the children size should + // match the number of struct fields and the children of the struct should be + // constant nodes. + if (cnChildren.size() != originalDescChildren.size()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", " + + " child struct size does not match with parent struct size " + + cnChildren.size() + ", " + originalDescChildren.size()); + } + return null; + } + + // For each of the struct field for the current row, add it to the corresponding struct. + for (int j = 0; j < originalDescChildren.size(); j++) { + // The fields of the struct better be constant nodes, otherwise bail out. + if (!(cnChildren.get(j) instanceof ExprNodeConstantDesc)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", " + + " struct field not a constant type for " + cnChildren.get(j).getExprString()); + } + return null; + } + ExprNodeDesc en = originalDescChildren.get(j); + + if (exprContainsOnlyPartitionColOrVirtualColOrConstants(en)) { + String currTabAlias = en instanceof ExprNodeColumnDesc ? + ((ExprNodeColumnDesc)en).getTabAlias() : en.getExprString(); + if (!tabAliasToConstPartColumns.containsKey(currTabAlias)) { + constPartColumns = new ArrayList(); + constPartColumns.add(cnChildren.get(j)); + tabAliasToConstPartColumns.put(currTabAlias, constPartColumns); + } else { + constPartColumns = tabAliasToConstPartColumns.get(currTabAlias); + constPartColumns.add(cnChildren.get(j)); + } + } + } + } + + // Convert the current row into structs. + // Consider T1 partitioned by A, B; T2 partitioned by C, T3 partitioned by D + // i.e. : STRUCT(T1.A, T1.B, T2.C, T3.D) => {STRUCT(T1.A, T1.B), STRUCT(T2.C), STRUCT(T3.D)} + for (Map.Entry> entry : tabAliasToConstPartColumns.entrySet()) { + String currTableName = entry.getKey(); + List currExprNodeList = tableAliasToInStruct.get(currTableName); + + currExprNodeList.add( new ExprNodeGenericFuncDesc( + TypeInfoFactory.getStructTypeInfo(tableAliasToPartName.get(currTableName), + tableAliasToTypeInfo.get(currTableName)), + FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(), + tabAliasToConstPartColumns.get(entry.getKey()))); + } + } + /** END FOR LOOP : Convert each row of the IN list to a list of structs. */ + + // Sanity check before we return to avoid any errors. + if (tableAliasToInStruct.size() == 0) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + + ", internal error for zero in clauses "); + } + return null; + } + + final List subExpr = + new ArrayList(originalDescChildren.size()+1); + + for (Map.Entry> entry : tableAliasToInStruct.entrySet()) { + subExpr.add(new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, FunctionRegistry. + getFunctionInfo(IN_UDF).getGenericUDF(), entry.getValue())); + } + + // If there is only 1 table ALIAS, return it + if (subExpr.size() == 1) { + // Return the new expression containing only partition columns + return subExpr.get(0); + } + // Return the new expression containing only partition columns + // after concatenating them with AND operator + return new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getFunctionInfo(AND_UDF).getGenericUDF(), subExpr); + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java index d83636d..289d775 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java @@ -18,14 +18,10 @@ package org.apache.hadoop.hive.ql.optimizer; import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.Stack; import org.apache.calcite.util.Pair; @@ -50,18 +46,15 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.ListMultimap; /** @@ -78,48 +71,14 @@ GenericUDFIn.class.getAnnotation(Description.class).name(); private static final String STRUCT_UDF = GenericUDFStruct.class.getAnnotation(Description.class).name(); - private static final String AND_UDF = - GenericUDFOPAnd.class.getAnnotation(Description.class).name(); - // these are closure-bound for all the walkers in context public final int minOrExpr; - public final boolean extract; - public final boolean testMode; /* * Pass in configs and pre-create a parse context */ - public PointLookupOptimizer(final int min, final boolean extract, final boolean testMode) { + public PointLookupOptimizer(final int min) { this.minOrExpr = min; - this.extract = extract; - this.testMode = testMode; - } - - // Hash Set iteration isn't ordered, but force string sorted order - // to get a consistent test run. - private Collection sortForTests( - Set valuesExpr) { - if (!testMode) { - // normal case - sorting is wasted for an IN() - return valuesExpr; - } - final Collection sortedValues; - - sortedValues = ImmutableSortedSet.copyOf( - new Comparator() { - @Override - public int compare(ExprNodeDescEqualityWrapper w1, - ExprNodeDescEqualityWrapper w2) { - // fail if you find nulls (this is a test-code section) - if (w1.equals(w2)) { - return 0; - } - return w1.getExprNodeDesc().getExprString() - .compareTo(w2.getExprNodeDesc().getExprString()); - } - }, valuesExpr); - - return sortedValues; } @Override @@ -152,9 +111,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (LOG.isDebugEnabled()) { LOG.debug("Generated new predicate with IN clause: " + newPredicate); } - if (!extract) { - filterOp.getConf().setOrigPredicate(predicate); - } filterOp.getConf().setPredicate(newPredicate); } @@ -326,50 +282,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, newPredicate = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), newChildren); - if (extract && columns.size() > 1) { - final List subExpr = new ArrayList(columns.size()+1); - - // extract pre-conditions for the tuple expressions - // (a,b) IN ((1,2),(2,3)) -> - // ((a) IN (1,2) and b in (2,3)) and (a,b) IN ((1,2),(2,3)) - - for (String keyString : columnConstantsMap.keySet()) { - final Set valuesExpr = - new HashSet(children.size()); - final List> partial = - columnConstantsMap.get(keyString); - for (int i = 0; i < children.size(); i++) { - Pair columnConstant = partial - .get(i); - valuesExpr - .add(new ExprNodeDescEqualityWrapper(columnConstant.right)); - } - ExprNodeColumnDesc lookupCol = partial.get(0).left; - // generate a partial IN clause, if the column is a partition column - if (lookupCol.getIsPartitionColOrVirtualCol() - || valuesExpr.size() < children.size()) { - // optimize only nDV reductions - final List inExpr = new ArrayList(); - inExpr.add(lookupCol); - for (ExprNodeDescEqualityWrapper value : sortForTests(valuesExpr)) { - inExpr.add(value.getExprNodeDesc()); - } - subExpr.add(new ExprNodeGenericFuncDesc( - TypeInfoFactory.booleanTypeInfo, FunctionRegistry - .getFunctionInfo(IN_UDF).getGenericUDF(), inExpr)); - } - } - // loop complete, inspect the sub expressions generated - if (subExpr.size() > 0) { - // add the newPredicate to the end & produce an AND clause - subExpr.add(newPredicate); - newPredicate = new ExprNodeGenericFuncDesc( - TypeInfoFactory.booleanTypeInfo, FunctionRegistry - .getFunctionInfo(AND_UDF).getGenericUDF(), subExpr); - } - // else, newPredicate is unmodified - } - return newPredicate; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java index 7262164..77b9093 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java @@ -55,8 +55,8 @@ protected void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop, TableScanOperator top) throws SemanticException, UDFArgumentException { OpWalkerCtx owc = (OpWalkerCtx) procCtx; // Otherwise this is not a sampling predicate and we need to - ExprNodeDesc predicate = fop.getConf().getOrigPredicate(); - predicate = predicate == null ? fop.getConf().getPredicate() : predicate; + ExprNodeDesc predicate = fop.getConf().getPartitionpruningPredicate() != null ? + fop.getConf().getPartitionpruningPredicate() : fop.getConf().getPredicate(); String alias = top.getConf().getAlias(); // Generate the partition pruning predicate diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java index 6a31689..7e24d07 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java @@ -79,7 +79,7 @@ public String toString() { private static final long serialVersionUID = 1L; private org.apache.hadoop.hive.ql.plan.ExprNodeDesc predicate; - private transient ExprNodeDesc origPredicate; + private transient ExprNodeDesc partitionpruningPredicate; private boolean isSamplingPred; private transient SampleDesc sampleDescr; //Is this a filter that should perform a comparison for sorted searches @@ -151,12 +151,12 @@ public void setSortedFilter(boolean isSortedFilter) { this.isSortedFilter = isSortedFilter; } - public void setOrigPredicate(ExprNodeDesc origPredicate) { - this.origPredicate = origPredicate; + public void setPartitionpruningPredicate(ExprNodeDesc partprunePred) { + this.partitionpruningPredicate = partprunePred; } - public ExprNodeDesc getOrigPredicate() { - return origPredicate; + public ExprNodeDesc getPartitionpruningPredicate() { + return partitionpruningPredicate; } /** diff --git a/ql/src/test/queries/clientpositive/pcs.q b/ql/src/test/queries/clientpositive/pcs.q new file mode 100644 index 0000000..2df6966 --- /dev/null +++ b/ql/src/test/queries/clientpositive/pcs.q @@ -0,0 +1,28 @@ +drop table pcs_t1; +drop table pcs_t2; + +create table pcs_t1 (key int, value string) partitioned by (ds string); +insert overwrite table pcs_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key; +insert overwrite table pcs_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key; +insert overwrite table pcs_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key; + +set hive.optimize.point.lookup = true; +set hive.optimize.point.lookup.min = 1; + +explain extended select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds; +select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds; + +set hive.optimize.point.lookup = false; +set hive.optimize.ppd=true; + +explain extended select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)); +select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)); + +explain extended select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)); +select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)); + +explain extended select a.ds, b.key from pcs_t1 a, pcs_t1 b where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')); +select a.ds, b.key from pcs_t1 a, pcs_t1 b where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')); + +explain extended select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)); +select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)); diff --git a/ql/src/test/queries/clientpositive/pointlookup.q b/ql/src/test/queries/clientpositive/pointlookup.q index 1aef2ef..dbc610c 100644 --- a/ql/src/test/queries/clientpositive/pointlookup.q +++ b/ql/src/test/queries/clientpositive/pointlookup.q @@ -18,8 +18,7 @@ WHERE set hive.optimize.point.lookup.min=3; -set hive.optimize.point.lookup.extract=false; - +set hive.optimize.ppd=false; explain SELECT key FROM src @@ -38,8 +37,7 @@ WHERE AND value = '3')) ; -set hive.optimize.point.lookup.extract=true; - +set hive.optimize.ppd=true; explain SELECT key FROM src diff --git a/ql/src/test/queries/clientpositive/pointlookup2.q b/ql/src/test/queries/clientpositive/pointlookup2.q index 31bebbb..8194147 100644 --- a/ql/src/test/queries/clientpositive/pointlookup2.q +++ b/ql/src/test/queries/clientpositive/pointlookup2.q @@ -14,7 +14,7 @@ from pcr_t1 insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2; set hive.optimize.point.lookup.min=2; -set hive.optimize.point.lookup.extract=true; +set hive.optimize.ppd=true; explain extended select key, value, ds diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out new file mode 100644 index 0000000..f12170c --- /dev/null +++ b/ql/src/test/results/clientpositive/pcs.q.out @@ -0,0 +1,1077 @@ +PREHOOK: query: drop table pcs_t1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table pcs_t1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table pcs_t2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table pcs_t2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table pcs_t1 (key int, value string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@pcs_t1 +POSTHOOK: query: create table pcs_t1 (key int, value string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@pcs_t1 +PREHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcs_t1@ds=2000-04-08 +POSTHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcs_t1@ds=2000-04-09 +POSTHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcs_t1@ds=2000-04-10 +POSTHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcs_t1@ds=2000-04-10 +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain extended select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + and + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + key + 1 + and + = + TOK_TABLE_OR_COL + ds + '2000-04-09' + = + TOK_TABLE_OR_COL + key + 2 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [pcs_t1] + /pcs_t1/ds=2000-04-09 [pcs_t1] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +2 val_2 2000-04-09 +PREHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + TOK_TABLE_OR_COL + ds + TOK_TABLE_OR_COL + key + TOK_FUNCTION + struct + '2000-04-08' + 1 + TOK_FUNCTION + struct + '2000-04-09' + 2 + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Processor Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +2000-04-09 +PREHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + TOK_TABLE_OR_COL + ds + + + TOK_TABLE_OR_COL + key + 2 + TOK_FUNCTION + struct + '2000-04-08' + 3 + TOK_FUNCTION + struct + '2000-04-09' + 4 + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Processor Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(ds,(key + 2))) IN (const struct('2000-04-08',3), const struct('2000-04-09',4)) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +2000-04-09 +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain extended select a.ds, b.key from pcs_t1 a, pcs_t1 b where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select a.ds, b.key from pcs_t1 a, pcs_t1 b where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + pcs_t1 + a + TOK_TABREF + TOK_TABNAME + pcs_t1 + b + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + ds + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + . + TOK_TABLE_OR_COL + a + ds + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + ds + TOK_FUNCTION + struct + '2000-04-08' + 1 + '2000-04-09' + TOK_FUNCTION + struct + '2000-04-09' + 2 + '2000-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: int), ds (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: int), _col1 (type: string) + auto parallelism: false + TableScan + alias: a + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: int), ds (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col0 (type: int), _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-10 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-10 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [$hdt$_0:a, $hdt$_1:a] + /pcs_t1/ds=2000-04-09 [$hdt$_0:a, $hdt$_1:a] + /pcs_t1/ds=2000-04-10 [$hdt$_0:a, $hdt$_1:a] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 66 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (struct(_col1,_col0,_col3)) IN (const struct('2000-04-08',1,'2000-04-09'), const struct('2000-04-09',2,'2000-04-08')) (type: boolean) + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: select a.ds, b.key from pcs_t1 a, pcs_t1 b where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +PREHOOK: Input: default@pcs_t1@ds=2000-04-10 +#### A masked pattern was here #### +POSTHOOK: query: select a.ds, b.key from pcs_t1 a, pcs_t1 b where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 +#### A masked pattern was here #### +2000-04-09 9 +2000-04-09 8 +2000-04-09 5 +2000-04-09 5 +2000-04-09 5 +2000-04-09 4 +2000-04-09 2 +2000-04-09 19 +2000-04-09 18 +2000-04-09 18 +2000-04-09 17 +2000-04-09 15 +2000-04-09 15 +2000-04-09 12 +2000-04-09 12 +2000-04-09 11 +2000-04-09 10 +2000-04-09 0 +2000-04-09 0 +2000-04-09 0 +PREHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + TOK_TABLE_OR_COL + ds + + + TOK_TABLE_OR_COL + key + TOK_TABLE_OR_COL + key + TOK_FUNCTION + struct + '2000-04-08' + 1 + TOK_FUNCTION + struct + '2000-04-09' + 2 + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Processor Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(ds,(key + key))) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/pointlookup.q.out b/ql/src/test/results/clientpositive/pointlookup.q.out index 7e19be4..b84d1cc 100644 --- a/ql/src/test/results/clientpositive/pointlookup.q.out +++ b/ql/src/test/results/clientpositive/pointlookup.q.out @@ -44,7 +44,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3'))) (type: boolean) + predicate: ((((key = '0') or (key = '1') or (key = '2') or (key = '3') or (key = '4') or (key = '5') or (key = '6') or (key = '7') or (key = '8') or (key = '9') or (key = '10')) and ((value = '8') or (value = '5') or (value = '6') or (value = '1') or (value = '3'))) and (((key = '0') and (value = '8')) or ((key = '1') and (value = '5')) or ((key = '2') and (value = '6')) or ((key = '3') and (value = '8')) or ((key = '4') and (value = '1')) or ((key = '5') and (value = '6')) or ((key = '6') and (value = '1')) or ((key = '7') and (value = '1')) or ((key = '8') and (value = '1')) or ((key = '9') and (value = '1')) or ((key = '10') and (value = '3')))) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -110,19 +110,26 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (((key = '0') or (key = '1') or (key = '2') or (key = '3') or (key = '4') or (key = '5') or (key = '6') or (key = '7') or (key = '8') or (key = '9') or (key = '10')) and ((value = '8') or (value = '5') or (value = '6') or (value = '1') or (value = '3'))) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1) IN ('1', '3', '5', '6', '8') and (struct(_col0,_col1)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3'))) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -176,7 +183,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value) IN ('1', '3', '5', '6', '8') and (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3'))) (type: boolean) + predicate: (((((key = '0') or (key = '1') or (key = '2') or (key = '3') or (key = '4') or (key = '5') or (key = '6') or (key = '7') or (key = '8') or (key = '9') or (key = '10')) and ((value = '8') or (value = '5') or (value = '6') or (value = '1') or (value = '3'))) and (value) IN ('1', '3', '5', '6', '8')) and (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3'))) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out index 55edd90..648cd07 100644 --- a/ql/src/test/results/clientpositive/pointlookup2.q.out +++ b/ql/src/test/results/clientpositive/pointlookup2.q.out @@ -165,7 +165,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + predicate: (((key = 1) or (key = 2)) and (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09'))) (type: boolean) Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) @@ -271,8 +271,8 @@ STAGE PLANS: name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds=2000-04-08 [pcr_t1] - /pcr_t1/ds=2000-04-09 [pcr_t1] + /pcr_t1/ds=2000-04-08 [$hdt$_0:pcr_t1] + /pcr_t1/ds=2000-04-09 [$hdt$_0:pcr_t1] Needs Tagging: false Reduce Operator Tree: Select Operator