diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index bf48f69..c825eb0 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1253,8 +1253,8 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "Whether to transform OR clauses in Filter operators into IN clauses"), HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 31, "Minimum number of OR clauses needed to transform into IN clauses"), - HIVEPOINTLOOKUPOPTIMIZEREXTRACT("hive.optimize.point.lookup.extract", true, - "Extract partial expressions when optimizing point lookup IN clauses"), + HIVEPARTITIONCOLUMNSEPARATOR("hive.optimize.partition.columns.separate", true, + "Extract partition columns from IN clauses"), // Constant propagation optimizer HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"), HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, "Removes identity project from operator tree"), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 439f616..4719159 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -73,11 +73,11 @@ public void initialize(HiveConf hiveConf) { if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) { final int min = HiveConf.getIntVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN); - final boolean extract = HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZEREXTRACT); - final boolean testMode = HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_IN_TEST); - transformations.add(new PointLookupOptimizer(min, extract, testMode)); + transformations.add(new PointLookupOptimizer(min)); + } + + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPARTITIONCOLUMNSEPARATOR)) { + transformations.add(new PartitionColumnsSeparator()); } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java new file mode 100644 index 0000000..07a59c2 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java @@ -0,0 +1,443 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.IdentityHashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.ForwardWalker; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.TypeRule; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +/** + * This optimization will take a Filter expression, and if its predicate contains + * an IN operator whose children are constant structs or structs containing constant fields, + * it will try to generate predicate with IN clauses containing only partition columns. + * This predicate is in turn used by the partition pruner to prune the columns that are not + * part of the original IN(STRUCT(..)..) predicate. + */ +public class PartitionColumnsSeparator implements Transform { + + private static final Log LOG = LogFactory.getLog(PointLookupOptimizer.class); + private static final String IN_UDF = + GenericUDFIn.class.getAnnotation(Description.class).name(); + private static final String STRUCT_UDF = + GenericUDFStruct.class.getAnnotation(Description.class).name(); + private static final String AND_UDF = + GenericUDFOPAnd.class.getAnnotation(Description.class).name(); + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + // 1. Trigger transformation + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), new StructInTransformer()); + + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); + GraphWalker ogw = new ForwardWalker(disp); + + List topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + return pctx; + } + + private class StructInTransformer implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + FilterOperator filterOp = (FilterOperator) nd; + ExprNodeDesc predicate = filterOp.getConf().getPredicate(); + + // Generate the list bucketing pruning predicate as 2 separate IN clauses + // containing the partitioning and non-partitioning columns. + ExprNodeDesc newPredicate = generateInClauses(predicate); + if (newPredicate != null) { + // Replace filter in current FIL with new FIL + if (LOG.isDebugEnabled()) { + LOG.debug("Generated new predicate with IN clause: " + newPredicate); + } + final List subExpr = + new ArrayList(2); + subExpr.add(predicate); + subExpr.add(newPredicate); + ExprNodeGenericFuncDesc newFilterPredicate = new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getFunctionInfo(AND_UDF).getGenericUDF(), subExpr); + filterOp.getConf().setPredicate(newFilterPredicate); + } + + return null; + } + + private ExprNodeDesc generateInClauses(ExprNodeDesc predicate) throws SemanticException { + Map exprRules = new LinkedHashMap(); + exprRules.put(new TypeRule(ExprNodeGenericFuncDesc.class), new StructInExprProcessor()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(null, exprRules, null); + GraphWalker egw = new PreOrderOnceWalker(disp); + + List startNodes = new ArrayList(); + startNodes.add(predicate); + + HashMap outputMap = new HashMap(); + egw.startWalking(startNodes, outputMap); + return (ExprNodeDesc) outputMap.get(predicate); + } + } + + private class StructInExprProcessor implements NodeProcessor { + + // Mapping from expression node to is an expression containing only + // partition or virtual column or constants + private Map exprNodeToPartOrVirtualColExpr = + new IdentityHashMap(); + + private boolean exprContainsOnlyPartitionColOrVirtualColOrConstants(ExprNodeDesc en) { + if (en == null) { + return true; + } + if (exprNodeToPartOrVirtualColExpr.containsKey(en)) { + return exprNodeToPartOrVirtualColExpr.get(en); + } + if (en instanceof ExprNodeColumnDesc) { + boolean ret = ((ExprNodeColumnDesc)en).getIsPartitionColOrVirtualCol(); + exprNodeToPartOrVirtualColExpr.put(en, ret); + return ret; + } + if (en instanceof ExprNodeConstantDesc || en.getChildren() == null) { + exprNodeToPartOrVirtualColExpr.put(en, true); + return true; + } + for (ExprNodeDesc cn : en.getChildren()) { + if (!exprContainsOnlyPartitionColOrVirtualColOrConstants(cn)) { + exprNodeToPartOrVirtualColExpr.put(en, false); + return false; + } + } + exprNodeToPartOrVirtualColExpr.put(en, true); + return true; + } + + private ExprNodeGenericFuncDesc getInExprNodeFromComplexExpression(ExprNodeDesc en) { + if (en == null) { + return null; + } + + if (en instanceof ExprNodeColumnDesc || en instanceof ExprNodeConstantDesc || + en.getChildren() == null) { + return null; + } + if (en instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc)(en)).getGenericUDF() + instanceof GenericUDFIn) { + return (ExprNodeGenericFuncDesc) en; + } + return null; + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + ExprNodeGenericFuncDesc fd = getInExprNodeFromComplexExpression((ExprNodeDesc)nd); + + + /***************************************************************************************\ + BEGIN : Early terminations for Partition Column Separator + /***************************************************************************************/ + // 1. If it is not an IN operator, we bail out. + if (fd == null || !(fd.getGenericUDF() instanceof GenericUDFIn)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", is not IN operator : "); + } + return null; + } + + // 2. It is an IN operator with struct children + List children = fd.getChildren(); + if (children.size() < 2 || !(children.get(0) instanceof ExprNodeGenericFuncDesc) || + (!(((ExprNodeGenericFuncDesc) children.get(0)).getGenericUDF() + instanceof GenericUDFStruct))) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", children size " + + children.size() + ", child expression : " + children.get(0).getExprString()); + } + return null; + } + + // 3. See if there are partition columns in the struct, if not bail out. + boolean isValidOptimization = false; + for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) children.get(0)).getChildren()) { + // Check if the current field expression contains only + // partition column or a virtual column or constants. + // If yes, this filter predicate is a candidate for this optimization. + if (!(ed instanceof ExprNodeConstantDesc) && + exprContainsOnlyPartitionColOrVirtualColOrConstants(ed)) { + isValidOptimization = true; + break; + } + } + if (!isValidOptimization) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + + ", there are no partition columns in struct fields"); + } + return null; + } + /***************************************************************************************\ + END : Early terminations for Partition Column Separator + /***************************************************************************************/ + + Map> tableAliasToExprNodeDesc = + new HashMap>(); + Map> tableAliasToPartName = new HashMap>(); + Map> tableAliasToTypeInfo = new HashMap>(); + ExprNodeGenericFuncDesc originalStructDesc = ((ExprNodeGenericFuncDesc) children.get(0)); + List originalDescChildren = originalStructDesc.getChildren(); + + // Set the first row of the IN clauses which is the struct field metadata. + for (ExprNodeDesc en : originalDescChildren) { + if (exprContainsOnlyPartitionColOrVirtualColOrConstants(en)) { + List exprNodeDescList; + List partNameList; + List typeInfoList; + String tabAlias = en instanceof ExprNodeColumnDesc ? ((ExprNodeColumnDesc)en).getTabAlias() : + en.getExprString(); + + if (!tableAliasToExprNodeDesc.containsKey(tabAlias)) { + exprNodeDescList = new ArrayList(); + exprNodeDescList.add(en); + tableAliasToExprNodeDesc.put(tabAlias, exprNodeDescList); + + partNameList = new ArrayList(); + partNameList.add(en.getName()); + tableAliasToPartName.put(tabAlias, partNameList); + + typeInfoList = new ArrayList(); + typeInfoList.add(en.getTypeInfo()); + tableAliasToTypeInfo.put(tabAlias, typeInfoList); + } else { + exprNodeDescList = tableAliasToExprNodeDesc.get(tabAlias); + exprNodeDescList.add(en); + + partNameList = tableAliasToPartName.get(tabAlias); + partNameList.add(en.getName()); + + typeInfoList = tableAliasToTypeInfo.get(tabAlias); + typeInfoList.add(en.getTypeInfo()); + } + } + } + + Map> tableAliasToInStruct = + new HashMap>(); + + for (Map.Entry> entry : tableAliasToExprNodeDesc.entrySet()) { + String currTabAlias = entry.getKey(); + List currStructExprList = new ArrayList(); + currStructExprList.add( new ExprNodeGenericFuncDesc( + TypeInfoFactory.getStructTypeInfo(tableAliasToPartName.get(currTabAlias), + tableAliasToTypeInfo.get(currTabAlias)), + FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(), + entry.getValue())); + tableAliasToInStruct.put(currTabAlias, currStructExprList); + } + + /** BEGIN FOR LOOP : Convert each row of the IN list to a list of structs. */ + for (int i = 1; i < children.size(); i++) { + // The children better be either constant structs or Generic Struct UDFs + // containing constant values, otherwise we cannot do this optimization. + if (!(children.get(i) instanceof ExprNodeConstantDesc || + (children.get(i) instanceof ExprNodeGenericFuncDesc && + ((ExprNodeGenericFuncDesc) children.get(i)). + getGenericUDF() instanceof GenericUDFStruct))) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + + ", child not a constant struct or Generic UDF struct " + + children.get(i).getExprString()); + } + return null; + } + + Map> tabAliasToConstPartColumns = new HashMap>(); + List constPartColumns = null; + + // Case 1 : The struct is a constant struct in which case the the value should be + // list of field values. + if (children.get(i) instanceof ExprNodeConstantDesc) { + // Sanity check. If the constant values do not match the metadata of the struct, + // return null. + if (!(((ExprNodeConstantDesc) (children.get(i))).getValue() instanceof List) || + ((List)((ExprNodeConstantDesc) (children.get(i))). + getValue()).size() != originalDescChildren.size()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", " + + children.get(i) + ", object value is : " + + ((ExprNodeConstantDesc) (children.get(i))).getValue()); + } + return null; + } + List cnCols = (List)(((ExprNodeConstantDesc) + (children.get(i))).getValue()); + + // For each field in the struct, add it to the constant partition columns + // or constant non-partition columns. + for (int j = 0; j < originalDescChildren.size(); j++) { + ExprNodeDesc en = originalDescChildren.get(j); + + if (exprContainsOnlyPartitionColOrVirtualColOrConstants(en)) { + String currTabAlias = en instanceof ExprNodeColumnDesc ? + ((ExprNodeColumnDesc)en).getTabAlias() : en.getExprString(); + + if (currTabAlias == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", " + + children.get(i) + ", internal error for unknown table name for column : " + + j); + } + return null; + } + if (!tabAliasToConstPartColumns.containsKey(currTabAlias)) { + constPartColumns = new ArrayList(); + constPartColumns.add(new ExprNodeConstantDesc(cnCols.get(j))); + tabAliasToConstPartColumns.put(currTabAlias, constPartColumns); + } else { + constPartColumns = tabAliasToConstPartColumns.get(currTabAlias); + constPartColumns.add(new ExprNodeConstantDesc(cnCols.get(j))); + } + } + } + } else { + List cnChildren = ((ExprNodeGenericFuncDesc) children.get(i)). + getChildren(); + // Case 2 : The struct is a generic UDF struct in which case the children size should + // match the number of struct fields and the children of the struct should be + // constant nodes. + if (cnChildren.size() != originalDescChildren.size()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", " + + " child struct size does not match with parent struct size " + + cnChildren.size() + ", " + originalDescChildren.size()); + } + return null; + } + + // For each of the struct field for the current row, add it to the corresponding struct. + for (int j = 0; j < originalDescChildren.size(); j++) { + // The fields of the struct better be constant nodes, otherwise bail out. + if (!(cnChildren.get(j) instanceof ExprNodeConstantDesc)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", " + + " struct field not a constant type for " + cnChildren.get(j).getExprString()); + } + return null; + } + ExprNodeDesc en = originalDescChildren.get(j); + + if (exprContainsOnlyPartitionColOrVirtualColOrConstants(en)) { + String currTabAlias = en instanceof ExprNodeColumnDesc ? + ((ExprNodeColumnDesc)en).getTabAlias() : en.getExprString(); + if (!tabAliasToConstPartColumns.containsKey(currTabAlias)) { + constPartColumns = new ArrayList(); + constPartColumns.add(cnChildren.get(j)); + tabAliasToConstPartColumns.put(currTabAlias, constPartColumns); + } else { + constPartColumns = tabAliasToConstPartColumns.get(currTabAlias); + constPartColumns.add(cnChildren.get(j)); + } + } + } + } + + // Convert the current row into structs. + // Consider T1 partitioned by A, B; T2 partitioned by C, T3 partitioned by D + // i.e. : STRUCT(T1.A, T1.B, T2.C, T3.D) => {STRUCT(T1.A, T1.B), STRUCT(T2.C), STRUCT(T3.D)} + for (Map.Entry> entry : tabAliasToConstPartColumns.entrySet()) { + String currTableName = entry.getKey(); + List currExprNodeList = tableAliasToInStruct.get(currTableName); + + currExprNodeList.add( new ExprNodeGenericFuncDesc( + TypeInfoFactory.getStructTypeInfo(tableAliasToPartName.get(currTableName), + tableAliasToTypeInfo.get(currTableName)), + FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(), + tabAliasToConstPartColumns.get(entry.getKey()))); + } + } + /** END FOR LOOP : Convert each row of the IN list to a list of structs. */ + + // Sanity check before we return to avoid any errors. + if (tableAliasToInStruct.size() == 0) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + + ", internal error for zero in clauses "); + } + return null; + } + + final List subExpr = + new ArrayList(originalDescChildren.size()+1); + + for (Map.Entry> entry : tableAliasToInStruct.entrySet()) { + subExpr.add(new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, FunctionRegistry. + getFunctionInfo(IN_UDF).getGenericUDF(), entry.getValue())); + } + + // If there is only 1 table ALIAS, return it + if (subExpr.size() == 1) { + // Return the new expression containing only partition columns + return subExpr.get(0); + } + // Return the new expression containing only partition columns + // after concatenating them with AND operator + return new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getFunctionInfo(AND_UDF).getGenericUDF(), subExpr); + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java index d83636d..289d775 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java @@ -18,14 +18,10 @@ package org.apache.hadoop.hive.ql.optimizer; import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.Stack; import org.apache.calcite.util.Pair; @@ -50,18 +46,15 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.ListMultimap; /** @@ -78,48 +71,14 @@ GenericUDFIn.class.getAnnotation(Description.class).name(); private static final String STRUCT_UDF = GenericUDFStruct.class.getAnnotation(Description.class).name(); - private static final String AND_UDF = - GenericUDFOPAnd.class.getAnnotation(Description.class).name(); - // these are closure-bound for all the walkers in context public final int minOrExpr; - public final boolean extract; - public final boolean testMode; /* * Pass in configs and pre-create a parse context */ - public PointLookupOptimizer(final int min, final boolean extract, final boolean testMode) { + public PointLookupOptimizer(final int min) { this.minOrExpr = min; - this.extract = extract; - this.testMode = testMode; - } - - // Hash Set iteration isn't ordered, but force string sorted order - // to get a consistent test run. - private Collection sortForTests( - Set valuesExpr) { - if (!testMode) { - // normal case - sorting is wasted for an IN() - return valuesExpr; - } - final Collection sortedValues; - - sortedValues = ImmutableSortedSet.copyOf( - new Comparator() { - @Override - public int compare(ExprNodeDescEqualityWrapper w1, - ExprNodeDescEqualityWrapper w2) { - // fail if you find nulls (this is a test-code section) - if (w1.equals(w2)) { - return 0; - } - return w1.getExprNodeDesc().getExprString() - .compareTo(w2.getExprNodeDesc().getExprString()); - } - }, valuesExpr); - - return sortedValues; } @Override @@ -152,9 +111,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (LOG.isDebugEnabled()) { LOG.debug("Generated new predicate with IN clause: " + newPredicate); } - if (!extract) { - filterOp.getConf().setOrigPredicate(predicate); - } filterOp.getConf().setPredicate(newPredicate); } @@ -326,50 +282,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, newPredicate = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), newChildren); - if (extract && columns.size() > 1) { - final List subExpr = new ArrayList(columns.size()+1); - - // extract pre-conditions for the tuple expressions - // (a,b) IN ((1,2),(2,3)) -> - // ((a) IN (1,2) and b in (2,3)) and (a,b) IN ((1,2),(2,3)) - - for (String keyString : columnConstantsMap.keySet()) { - final Set valuesExpr = - new HashSet(children.size()); - final List> partial = - columnConstantsMap.get(keyString); - for (int i = 0; i < children.size(); i++) { - Pair columnConstant = partial - .get(i); - valuesExpr - .add(new ExprNodeDescEqualityWrapper(columnConstant.right)); - } - ExprNodeColumnDesc lookupCol = partial.get(0).left; - // generate a partial IN clause, if the column is a partition column - if (lookupCol.getIsPartitionColOrVirtualCol() - || valuesExpr.size() < children.size()) { - // optimize only nDV reductions - final List inExpr = new ArrayList(); - inExpr.add(lookupCol); - for (ExprNodeDescEqualityWrapper value : sortForTests(valuesExpr)) { - inExpr.add(value.getExprNodeDesc()); - } - subExpr.add(new ExprNodeGenericFuncDesc( - TypeInfoFactory.booleanTypeInfo, FunctionRegistry - .getFunctionInfo(IN_UDF).getGenericUDF(), inExpr)); - } - } - // loop complete, inspect the sub expressions generated - if (subExpr.size() > 0) { - // add the newPredicate to the end & produce an AND clause - subExpr.add(newPredicate); - newPredicate = new ExprNodeGenericFuncDesc( - TypeInfoFactory.booleanTypeInfo, FunctionRegistry - .getFunctionInfo(AND_UDF).getGenericUDF(), subExpr); - } - // else, newPredicate is unmodified - } - return newPredicate; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java index 825938a..3f3f88d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java @@ -48,9 +48,12 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /** @@ -364,6 +367,40 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return getResultWrapFromResults(results, fd, newNodeOutputs); } return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs)); + } else if (fd.getGenericUDF() instanceof GenericUDFIn) { + List children = fd.getChildren(); + boolean removePredElem = false; + ExprNodeDesc lhs = children.get(0); + // If this is a single column and if it is a partition column, this + // predicate is a candidate for removal. + if (lhs instanceof ExprNodeColumnDesc && + ((ExprNodeColumnDesc)lhs).getIsPartitionColOrVirtualCol()) { + removePredElem = true; + } else if (lhs instanceof ExprNodeGenericFuncDesc) { + // Make sure that the generic udf is deterministic + if (FunctionRegistry.isDeterministic(((ExprNodeGenericFuncDesc) lhs) + .getGenericUDF())) { + boolean hasOnlyPartCols = true; + for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) lhs).getChildren()) { + // Check if the current field expression contains only + // partition column or a virtual column or constants. + // If yes, this filter predicate is a candidate for this optimization. + if (!(ed instanceof ExprNodeColumnDesc && + ((ExprNodeColumnDesc)ed).getIsPartitionColOrVirtualCol())) { + hasOnlyPartCols = false; + break; + } + } + removePredElem = hasOnlyPartCols; + } + } + + // If removePredElem is set to true, return true as this is a potential candidate + // for partition condition remover. Else, set the WalkState for this node to unknown. + return removePredElem ? + new NodeInfoWrapper(WalkState.TRUE, null, + new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE)) : + new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)) ; } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) { // If it's a non-deterministic UDF, set unknown to true return new NodeInfoWrapper(WalkState.UNKNOWN, null, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java index 7262164..fd51628 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java @@ -55,8 +55,7 @@ protected void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop, TableScanOperator top) throws SemanticException, UDFArgumentException { OpWalkerCtx owc = (OpWalkerCtx) procCtx; // Otherwise this is not a sampling predicate and we need to - ExprNodeDesc predicate = fop.getConf().getOrigPredicate(); - predicate = predicate == null ? fop.getConf().getPredicate() : predicate; + ExprNodeDesc predicate = fop.getConf().getPredicate(); String alias = top.getConf().getAlias(); // Generate the partition pruning predicate diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java index 6a31689..ccc4bb4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java @@ -79,7 +79,6 @@ public String toString() { private static final long serialVersionUID = 1L; private org.apache.hadoop.hive.ql.plan.ExprNodeDesc predicate; - private transient ExprNodeDesc origPredicate; private boolean isSamplingPred; private transient SampleDesc sampleDescr; //Is this a filter that should perform a comparison for sorted searches @@ -151,14 +150,6 @@ public void setSortedFilter(boolean isSortedFilter) { this.isSortedFilter = isSortedFilter; } - public void setOrigPredicate(ExprNodeDesc origPredicate) { - this.origPredicate = origPredicate; - } - - public ExprNodeDesc getOrigPredicate() { - return origPredicate; - } - /** * Some filters are generated or implied, which means it is not in the query. * It is added by the analyzer. For example, when we do an inner join, we add diff --git a/ql/src/test/queries/clientpositive/pcs.q b/ql/src/test/queries/clientpositive/pcs.q new file mode 100644 index 0000000..4b35a4d --- /dev/null +++ b/ql/src/test/queries/clientpositive/pcs.q @@ -0,0 +1,66 @@ +drop table pcs_t1; +drop table pcs_t2; + +create table pcs_t1 (key int, value string) partitioned by (ds string); +insert overwrite table pcs_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key; +insert overwrite table pcs_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key; +insert overwrite table pcs_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key; + +analyze table pcs_t1 partition(ds) compute statistics; +analyze table pcs_t1 partition(ds) compute statistics for columns; + +set hive.optimize.point.lookup = true; +set hive.optimize.point.lookup.min = 1; + +explain extended select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds; +select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds; + +set hive.optimize.point.lookup = false; +set hive.optimize.partition.columns.separate=true; +set hive.optimize.ppd=true; + +explain extended select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)); +select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)); + +explain extended select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)); +select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)); + +explain extended select /*+ MAPJOIN(pcs_t1) */ a.ds, b.key from pcs_t1 a join pcs_t1 b on a.ds=b.ds where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')); + +select /*+ MAPJOIN(pcs_t1) */ a.ds, b.key from pcs_t1 a join pcs_t1 b on a.ds=b.ds where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')); + +explain extended select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)); +select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)); + +explain select lag(key) over (partition by key) as c1 +from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)); +select lag(key) over (partition by key) as c1 +from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)); + +EXPLAIN EXTENDED +SELECT * FROM ( + SELECT X.* FROM pcs_t1 X WHERE struct(X.ds, X.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) + UNION ALL + SELECT Y.* FROM pcs_t1 Y WHERE struct(Y.ds, Y.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +) A +WHERE A.ds = '2008-04-08' +SORT BY A.key, A.value, A.ds; + +SELECT * FROM ( + SELECT X.* FROM pcs_t1 X WHERE struct(X.ds, X.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) + UNION ALL + SELECT Y.* FROM pcs_t1 Y WHERE struct(Y.ds, Y.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +) A +WHERE A.ds = '2008-04-08' +SORT BY A.key, A.value, A.ds; + +explain extended select ds from pcs_t1 where struct(case when ds='2000-04-08' then 10 else 20 end) in (struct(10),struct(11)); +select ds from pcs_t1 where struct(case when ds='2000-04-08' then 10 else 20 end) in (struct(10),struct(11)); + +explain extended select ds from pcs_t1 where struct(ds, key, rand(100)) in (struct('2000-04-08',1,0.2), struct('2000-04-09',2,0.3)); + +explain extended select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)); +select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)); + +explain extended select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0); +select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0); \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/pointlookup.q b/ql/src/test/queries/clientpositive/pointlookup.q index 1aef2ef..c460f39 100644 --- a/ql/src/test/queries/clientpositive/pointlookup.q +++ b/ql/src/test/queries/clientpositive/pointlookup.q @@ -18,8 +18,7 @@ WHERE set hive.optimize.point.lookup.min=3; -set hive.optimize.point.lookup.extract=false; - +set hive.optimize.partition.columns.separate=false; explain SELECT key FROM src @@ -38,8 +37,7 @@ WHERE AND value = '3')) ; -set hive.optimize.point.lookup.extract=true; - +set hive.optimize.partition.columns.separate=true; explain SELECT key FROM src diff --git a/ql/src/test/queries/clientpositive/pointlookup2.q b/ql/src/test/queries/clientpositive/pointlookup2.q index 31bebbb..94e99fb 100644 --- a/ql/src/test/queries/clientpositive/pointlookup2.q +++ b/ql/src/test/queries/clientpositive/pointlookup2.q @@ -14,7 +14,7 @@ from pcr_t1 insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2; set hive.optimize.point.lookup.min=2; -set hive.optimize.point.lookup.extract=true; +set hive.optimize.partition.columns.separate=true; explain extended select key, value, ds diff --git a/ql/src/test/queries/clientpositive/pointlookup3.q b/ql/src/test/queries/clientpositive/pointlookup3.q index 3daa94b..79e7348 100644 --- a/ql/src/test/queries/clientpositive/pointlookup3.q +++ b/ql/src/test/queries/clientpositive/pointlookup3.q @@ -6,7 +6,7 @@ insert overwrite table pcr_t1 partition (ds1='2000-04-09', ds2='2001-04-09') sel insert overwrite table pcr_t1 partition (ds1='2000-04-10', ds2='2001-04-10') select * from src where key < 20 order by key; set hive.optimize.point.lookup.min=2; -set hive.optimize.point.lookup.extract=true; +set hive.optimize.partition.columns.separate=true; explain extended select key, value, ds1, ds2 diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out index eca29df..ddb05e2 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out @@ -153,7 +153,7 @@ STAGE PLANS: TableScan alias: acid Filter Operator - predicate: (key = 'foo') (type: boolean) + predicate: ((key = 'foo') and (ds) IN ('2008-04-08')) (type: boolean) Select Operator expressions: ROW__ID (type: struct), ds (type: string) outputColumnNames: _col0, _col3 @@ -390,7 +390,7 @@ STAGE PLANS: TableScan alias: acid Filter Operator - predicate: (key = 'foo') (type: boolean) + predicate: ((key = 'foo') and (ds) IN ('2008-04-08')) (type: boolean) Select Operator expressions: ROW__ID (type: struct), ds (type: string) outputColumnNames: _col0, _col3 diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out new file mode 100644 index 0000000..4084065 --- /dev/null +++ b/ql/src/test/results/clientpositive/pcs.q.out @@ -0,0 +1,2242 @@ +PREHOOK: query: drop table pcs_t1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table pcs_t1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table pcs_t2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table pcs_t2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table pcs_t1 (key int, value string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@pcs_t1 +POSTHOOK: query: create table pcs_t1 (key int, value string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@pcs_t1 +PREHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcs_t1@ds=2000-04-08 +POSTHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcs_t1@ds=2000-04-09 +POSTHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcs_t1@ds=2000-04-10 +POSTHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcs_t1@ds=2000-04-10 +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: analyze table pcs_t1 partition(ds) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +PREHOOK: Input: default@pcs_t1@ds=2000-04-10 +PREHOOK: Output: default@pcs_t1 +PREHOOK: Output: default@pcs_t1@ds=2000-04-08 +PREHOOK: Output: default@pcs_t1@ds=2000-04-09 +PREHOOK: Output: default@pcs_t1@ds=2000-04-10 +POSTHOOK: query: analyze table pcs_t1 partition(ds) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 +POSTHOOK: Output: default@pcs_t1 +POSTHOOK: Output: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Output: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Output: default@pcs_t1@ds=2000-04-10 +PREHOOK: query: analyze table pcs_t1 partition(ds) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +PREHOOK: Input: default@pcs_t1@ds=2000-04-10 +#### A masked pattern was here #### +POSTHOOK: query: analyze table pcs_t1 partition(ds) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 +#### A masked pattern was here #### +PREHOOK: query: explain extended select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + and + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + key + 1 + and + = + TOK_TABLE_OR_COL + ds + '2000-04-09' + = + TOK_TABLE_OR_COL + key + 2 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [pcs_t1] + /pcs_t1/ds=2000-04-09 [pcs_t1] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +2 val_2 2000-04-09 +PREHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + TOK_TABLE_OR_COL + ds + TOK_TABLE_OR_COL + key + TOK_FUNCTION + struct + '2000-04-08' + 1 + TOK_FUNCTION + struct + '2000-04-09' + 2 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [pcs_t1] + /pcs_t1/ds=2000-04-09 [pcs_t1] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +2000-04-09 +PREHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + TOK_TABLE_OR_COL + ds + + + TOK_TABLE_OR_COL + key + 2 + TOK_FUNCTION + struct + '2000-04-08' + 3 + TOK_FUNCTION + struct + '2000-04-09' + 4 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(ds,(key + 2))) IN (const struct('2000-04-08',3), const struct('2000-04-09',4)) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [pcs_t1] + /pcs_t1/ds=2000-04-09 [pcs_t1] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +2000-04-09 +PREHOOK: query: explain extended select /*+ MAPJOIN(pcs_t1) */ a.ds, b.key from pcs_t1 a join pcs_t1 b on a.ds=b.ds where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select /*+ MAPJOIN(pcs_t1) */ a.ds, b.key from pcs_t1 a join pcs_t1 b on a.ds=b.ds where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + pcs_t1 + a + TOK_TABREF + TOK_TABNAME + pcs_t1 + b + = + . + TOK_TABLE_OR_COL + a + ds + . + TOK_TABLE_OR_COL + b + ds + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + pcs_t1 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + ds + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + . + TOK_TABLE_OR_COL + a + ds + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + ds + TOK_FUNCTION + struct + '2000-04-08' + 1 + '2000-04-09' + TOK_FUNCTION + struct + '2000-04-09' + 2 + '2000-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: ds (type: string) + sort order: + + Map-reduce partition columns: ds (type: string) + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: int) + auto parallelism: false + TableScan + alias: b + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: ds (type: string) + sort order: + + Map-reduce partition columns: ds (type: string) + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: int) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [a, b] + /pcs_t1/ds=2000-04-09 [a, b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 ds (type: string) + 1 ds (type: string) + outputColumnNames: _col0, _col2, _col6, _col8 + Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((struct(_col2,_col0,_col8)) IN (const struct('2000-04-08',1,'2000-04-09'), const struct('2000-04-09',2,'2000-04-08')) and ((struct(_col8)) IN (const struct('2000-04-09'), const struct('2000-04-08')) and (struct(_col2)) IN (const struct('2000-04-08'), const struct('2000-04-09')))) (type: boolean) + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col6 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ MAPJOIN(pcs_t1) */ a.ds, b.key from pcs_t1 a join pcs_t1 b on a.ds=b.ds where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ MAPJOIN(pcs_t1) */ a.ds, b.key from pcs_t1 a join pcs_t1 b on a.ds=b.ds where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +PREHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + TOK_TABLE_OR_COL + ds + + + TOK_TABLE_OR_COL + key + TOK_TABLE_OR_COL + key + TOK_FUNCTION + struct + '2000-04-08' + 1 + TOK_FUNCTION + struct + '2000-04-09' + 2 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(ds,(key + key))) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [pcs_t1] + /pcs_t1/ds=2000-04-09 [pcs_t1] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +PREHOOK: query: explain select lag(key) over (partition by key) as c1 +from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +POSTHOOK: query: explain select lag(key) over (partition by key) as c1 +from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: lag_window_0 + arguments: _col0 + name: lag + window function: GenericUDAFLagEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: lag_window_0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select lag(key) over (partition by key) as c1 +from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select lag(key) over (partition by key) as c1 +from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +NULL +PREHOOK: query: EXPLAIN EXTENDED +SELECT * FROM ( + SELECT X.* FROM pcs_t1 X WHERE struct(X.ds, X.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) + UNION ALL + SELECT Y.* FROM pcs_t1 Y WHERE struct(Y.ds, Y.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +) A +WHERE A.ds = '2008-04-08' +SORT BY A.key, A.value, A.ds +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +SELECT * FROM ( + SELECT X.* FROM pcs_t1 X WHERE struct(X.ds, X.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) + UNION ALL + SELECT Y.* FROM pcs_t1 Y WHERE struct(Y.ds, Y.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +) A +WHERE A.ds = '2008-04-08' +SORT BY A.key, A.value, A.ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_UNIONALL + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + X + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + X + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + . + TOK_TABLE_OR_COL + X + ds + . + TOK_TABLE_OR_COL + X + key + TOK_FUNCTION + struct + '2000-04-08' + 1 + TOK_FUNCTION + struct + '2000-04-09' + 2 + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + Y + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + Y + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + . + TOK_TABLE_OR_COL + Y + ds + . + TOK_TABLE_OR_COL + Y + key + TOK_FUNCTION + struct + '2000-04-08' + 1 + TOK_FUNCTION + struct + '2000-04-09' + 2 + A + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + . + TOK_TABLE_OR_COL + A + ds + '2008-04-08' + TOK_SORTBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + A + key + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + A + value + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + A + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string) + sort order: +++ + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + tag: -1 + auto parallelism: false + TableScan + alias: y + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string) + sort order: +++ + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + tag: -1 + auto parallelism: false + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), '2008-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM ( + SELECT X.* FROM pcs_t1 X WHERE struct(X.ds, X.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) + UNION ALL + SELECT Y.* FROM pcs_t1 Y WHERE struct(Y.ds, Y.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +) A +WHERE A.ds = '2008-04-08' +SORT BY A.key, A.value, A.ds +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM ( + SELECT X.* FROM pcs_t1 X WHERE struct(X.ds, X.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) + UNION ALL + SELECT Y.* FROM pcs_t1 Y WHERE struct(Y.ds, Y.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +) A +WHERE A.ds = '2008-04-08' +SORT BY A.key, A.value, A.ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +#### A masked pattern was here #### +PREHOOK: query: explain extended select ds from pcs_t1 where struct(case when ds='2000-04-08' then 10 else 20 end) in (struct(10),struct(11)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(case when ds='2000-04-08' then 10 else 20 end) in (struct(10),struct(11)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + TOK_FUNCTION + when + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + 10 + 20 + TOK_FUNCTION + struct + 10 + TOK_FUNCTION + struct + 11 + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Processor Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((const struct(10)) IN (const struct(10), const struct(11)) and (const struct(10)) IN (const struct(10), const struct(11))) (type: boolean) + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select ds from pcs_t1 where struct(case when ds='2000-04-08' then 10 else 20 end) in (struct(10),struct(11)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where struct(case when ds='2000-04-08' then 10 else 20 end) in (struct(10),struct(11)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +#### A masked pattern was here #### +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +PREHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key, rand(100)) in (struct('2000-04-08',1,0.2), struct('2000-04-09',2,0.3)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key, rand(100)) in (struct('2000-04-08',1,0.2), struct('2000-04-09',2,0.3)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + TOK_TABLE_OR_COL + ds + TOK_TABLE_OR_COL + key + TOK_FUNCTION + rand + 100 + TOK_FUNCTION + struct + '2000-04-08' + 1 + 0.2 + TOK_FUNCTION + struct + '2000-04-09' + 2 + 0.3 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((struct(ds,key,rand(100))) IN (const struct('2000-04-08',1,0.2), const struct('2000-04-09',2,0.3)) and (struct(rand(100))) IN (const struct(0.2), const struct(0.3))) (type: boolean) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [pcs_t1] + /pcs_t1/ds=2000-04-09 [pcs_t1] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + or + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + key + 2 + TOK_TABLE_OR_COL + key + TOK_FUNCTION + struct + true + 2 + TOK_FUNCTION + struct + false + 3 + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-10 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Processor Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(((ds = '2000-04-08') or (key = 2)),key)) IN (const struct(true,2), const struct(false,3)) (type: boolean) + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +PREHOOK: Input: default@pcs_t1@ds=2000-04-10 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 +#### A masked pattern was here #### +2000-04-08 +2000-04-09 +2000-04-10 +PREHOOK: query: explain extended select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + = + TOK_TABLE_OR_COL + key + 3 + and + TOK_FUNCTION + in + TOK_FUNCTION + struct + or + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + key + 2 + TOK_TABLE_OR_COL + key + TOK_FUNCTION + struct + true + 2 + TOK_FUNCTION + struct + false + 3 + > + + + TOK_TABLE_OR_COL + key + 5 + 0 + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-10 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Processor Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = 3) or ((struct(((ds = '2000-04-08') or (key = 2)),key)) IN (const struct(true,2), const struct(false,3)) and ((key + 5) > 0))) (type: boolean) + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +PREHOOK: Input: default@pcs_t1@ds=2000-04-10 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 +#### A masked pattern was here #### +2000-04-08 +2000-04-09 +2000-04-10 diff --git a/ql/src/test/results/clientpositive/pointlookup.q.out b/ql/src/test/results/clientpositive/pointlookup.q.out index 7e19be4..a99b388 100644 --- a/ql/src/test/results/clientpositive/pointlookup.q.out +++ b/ql/src/test/results/clientpositive/pointlookup.q.out @@ -176,15 +176,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value) IN ('1', '3', '5', '6', '8') and (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3'))) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat