diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 59b66cd..b5f1e64 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1258,8 +1258,8 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "Whether to transform OR clauses in Filter operators into IN clauses"), HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 31, "Minimum number of OR clauses needed to transform into IN clauses"), - HIVEPOINTLOOKUPOPTIMIZEREXTRACT("hive.optimize.point.lookup.extract", true, - "Extract partial expressions when optimizing point lookup IN clauses"), + HIVEPARTITIONCOLUMNSEPARATOR("hive.optimize.partition.columns.separate", true, + "Extract partition columns from IN clauses"), // Constant propagation optimizer HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"), HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, "Removes identity project from operator tree"), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 7ee5081..6347872 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -84,11 +84,11 @@ public void initialize(HiveConf hiveConf) { if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) { final int min = HiveConf.getIntVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN); - final boolean extract = HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZEREXTRACT); - final boolean testMode = HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_IN_TEST); - transformations.add(new PointLookupOptimizer(min, extract, testMode)); + transformations.add(new PointLookupOptimizer(min)); + } + + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPARTITIONCOLUMNSEPARATOR)) { + transformations.add(new PartitionColumnsSeparator()); } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java new file mode 100644 index 0000000..f71f37c --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java @@ -0,0 +1,525 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.IdentityHashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.Stack; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.ForwardWalker; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.TypeRule; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +/** + * This optimization will take a Filter expression, and if its predicate contains + * an IN operator whose children are constant structs or structs containing constant fields, + * it will try to generate predicate with IN clauses containing only partition columns. + * This predicate is in turn used by the partition pruner to prune the columns that are not + * part of the original IN(STRUCT(..)..) predicate. + */ +public class PartitionColumnsSeparator implements Transform { + + private static final Log LOG = LogFactory.getLog(PointLookupOptimizer.class); + private static final String IN_UDF = + GenericUDFIn.class.getAnnotation(Description.class).name(); + private static final String STRUCT_UDF = + GenericUDFStruct.class.getAnnotation(Description.class).name(); + private static final String AND_UDF = + GenericUDFOPAnd.class.getAnnotation(Description.class).name(); + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + // 1. Trigger transformation + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), new StructInTransformer()); + + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); + GraphWalker ogw = new ForwardWalker(disp); + + List topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + return pctx; + } + + private class StructInTransformer implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + FilterOperator filterOp = (FilterOperator) nd; + ExprNodeDesc predicate = filterOp.getConf().getPredicate(); + + // Generate the list bucketing pruning predicate as 2 separate IN clauses + // containing the partitioning and non-partitioning columns. + ExprNodeDesc newPredicate = generateInClauses(predicate); + if (newPredicate != null) { + // Replace filter in current FIL with new FIL + if (LOG.isDebugEnabled()) { + LOG.debug("Generated new predicate with IN clause: " + newPredicate); + } + final List subExpr = + new ArrayList(2); + subExpr.add(predicate); + subExpr.add(newPredicate); + ExprNodeGenericFuncDesc newFilterPredicate = new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getFunctionInfo(AND_UDF).getGenericUDF(), subExpr); + filterOp.getConf().setPredicate(newFilterPredicate); + } + + return null; + } + + private ExprNodeDesc generateInClauses(ExprNodeDesc predicate) throws SemanticException { + Map exprRules = new LinkedHashMap(); + exprRules.put(new TypeRule(ExprNodeGenericFuncDesc.class), new StructInExprProcessor()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(null, exprRules, null); + GraphWalker egw = new PreOrderOnceWalker(disp); + + List startNodes = new ArrayList(); + startNodes.add(predicate); + + HashMap outputMap = new HashMap(); + egw.startWalking(startNodes, outputMap); + return (ExprNodeDesc) outputMap.get(predicate); + } + } + + /** + * The StructInExprProcessor processes the IN clauses of the following format : + * STRUCT(T1.a, T1.b, T2.b, T2.c) IN (STRUCT(1, 2, 3, 4) , STRUCT(2, 3, 4, 5)) + * where T1.a, T1.b, T2.c are all partition columns and T2.b is a non-partition + * column. The resulting additional predicate generated after + * StructInExprProcessor.process() looks like : + * STRUCT(T1.a, T1.b) IN (STRUCT(1, 2), STRUCT(2, 3)) + * AND + * STRUCT(T2.b) IN (STRUCT(4), STRUCT(5)) + * The additional predicate generated is used to prune the partitions that are + * part of the given query. Once the partitions are pruned, the partition condition + * remover is expected to remove the redundant predicates from the plan. + */ + private class StructInExprProcessor implements NodeProcessor { + + /** TableInfo is populated in PASS 1 of process(). It contains the information required + * to generate an IN clause of the following format: + * STRUCT(T1.a, T1.b) IN (const STRUCT(1, 2), const STRUCT(2, 3)) + * In the above e.g. please note that all elements of the struct come from the same table. + * The populated TableStructInfo is used to generate the IN clause in PASS 2 of process(). + * The table struct information class has the following fields: + * 1. Expression Node Descriptor for the Left Hand Side of the IN clause for the table + * 2. 2-D List of expression node descriptors which corresponds to the elements of IN clause + */ + class TableInfo { + List exprNodeLHSDescriptor; + List> exprNodeRHSStructs; + + public TableInfo() { + exprNodeLHSDescriptor = new ArrayList(); + exprNodeRHSStructs = new ArrayList>(); + } + } + + // Mapping from expression node to is an expression containing only + // partition or virtual column or constants + private Map exprNodeToPartOrVirtualColOrConstExpr = + new IdentityHashMap(); + + /** + * This function iterates through the entire subtree under a given expression node + * and makes sure that the expression contain only constant nodes or + * partition/virtual columns as leaf nodes. + * @param en Expression Node Descriptor for the root node. + * @return true if the subtree rooted under en has only partition/virtual columns or + * constant values as the leaf nodes. Else, return false. + */ + private boolean exprContainsOnlyPartitionColOrVirtualColOrConstants(ExprNodeDesc en) { + if (en == null) { + return true; + } + if (exprNodeToPartOrVirtualColOrConstExpr.containsKey(en)) { + return exprNodeToPartOrVirtualColOrConstExpr.get(en); + } + if (en instanceof ExprNodeColumnDesc) { + boolean ret = ((ExprNodeColumnDesc)en).getIsPartitionColOrVirtualCol(); + exprNodeToPartOrVirtualColOrConstExpr.put(en, ret); + return ret; + } + if (en.getChildren() != null) { + for (ExprNodeDesc cn : en.getChildren()) { + if (!exprContainsOnlyPartitionColOrVirtualColOrConstants(cn)) { + exprNodeToPartOrVirtualColOrConstExpr.put(en, false); + return false; + } + } + } + exprNodeToPartOrVirtualColOrConstExpr.put(en, true); + return true; + } + + + /** + * Check if the expression node satisfies the following : + * Has atleast one subexpression containing a partition/virtualcolumn and has + * exactly refer to a single table alias. + * @param en Expression Node Descriptor + * @return true if there is atleast one subexpression with partition/virtual column + * and has exactly refer to a single table alias. If not, return false. + */ + private boolean hasAtleastOneSubExprWithPartColOrVirtualColWithOneTableAlias(ExprNodeDesc en) { + if (en == null || en.getChildren() == null) { + return false; + } + for (ExprNodeDesc cn : en.getChildren()) { + if (exprContainsOnlyPartitionColOrVirtualColOrConstants(cn) && getTableAlias(cn) != null) { + return true; + } + } + return false; + } + + + /** + * Check if the expression node satisfies the following : + * Has all subexpressions containing constants or a partition/virtual column/coming from the + * same table + * @param en Expression Node Descriptor + * @return true/false based on the condition specified in the above description. + */ + private boolean hasAllSubExprWithConstOrPartColOrVirtualColWithOneTableAlias(ExprNodeDesc en) { + if (!exprContainsOnlyPartitionColOrVirtualColOrConstants(en)) { + return false; + } + + Set s = new HashSet(); + Set visited = new HashSet(); + + return getTableAliasHelper(en, s, visited); + } + + + /** + * Return the expression node descriptor if the input expression node is a GenericUDFIn. + * Else, return null. + * @param en Expression Node Descriptor + * @return The expression node descriptor if the input expression node represents an IN clause. + * Else, return null. + */ + private ExprNodeGenericFuncDesc getInExprNode(ExprNodeDesc en) { + if (en == null) { + return null; + } + + if (en instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc)(en)).getGenericUDF() + instanceof GenericUDFIn) { + return (ExprNodeGenericFuncDesc) en; + } + return null; + } + + + /** + * Helper used by getTableAlias + * @param en Expression Node Descriptor + * @param s Set of the table Aliases associated with the current Expression node. + * @param visited Visited ExpressionNode set. + * @return true if en has at most one table associated with it, else return false. + */ + private boolean getTableAliasHelper(ExprNodeDesc en, Set s, Set visited) { + visited.add(en); + + // The current expression node is a column, see if the column alias is already a part of + // the return set, s. If not and we already have an entry in set s, this is an invalid expression + // and return false. + if (en instanceof ExprNodeColumnDesc) { + if (s.size() > 0 && + !s.contains(((ExprNodeColumnDesc)en).getTabAlias())) { + return false; + } + if (s.size() == 0) { + s.add(((ExprNodeColumnDesc)en).getTabAlias()); + } + return true; + } + if (en.getChildren() == null) { + return true; + } + + // Iterative through the children in a DFS manner to see if there is more than 1 table alias + // referenced by the current expression node. + for (ExprNodeDesc cn : en.getChildren()) { + if (visited.contains(cn)) { + continue; + } + if (cn instanceof ExprNodeColumnDesc) { + s.add(((ExprNodeColumnDesc) cn).getTabAlias()); + } else if (!(cn instanceof ExprNodeConstantDesc)) { + if (!getTableAliasHelper(cn, s, visited)) { + return false; + } + } + } + return true; + } + + + /** + * If the given expression has just a single table associated with it, + * return the table alias associated with it. Else, return null. + * @param en + * @return The table alias associated with the expression if there is a single table + * reference. Else, return null. + */ + private String getTableAlias(ExprNodeDesc en) { + Set s = new HashSet(); + Set visited = new HashSet(); + boolean singleTableAlias = getTableAliasHelper(en, s, visited); + + if (!singleTableAlias || s.size() == 0) { + return null; + } + StringBuilder ans = new StringBuilder(); + for (String st : s) { + ans.append(st); + } + return ans.toString(); + } + + + /** + * The main process method for StructInExprProcessor to generate additional predicates + * containing only partition columns. + */ + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + ExprNodeGenericFuncDesc fd = getInExprNode((ExprNodeDesc)nd); + + /***************************************************************************************\ + BEGIN : Early terminations for Partition Column Separator + /***************************************************************************************/ + // 1. If the input node is not an IN operator, we bail out. + if (fd == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", is not IN operator : "); + } + return null; + } + + // 2. Check if the input is an IN operator with struct children + List children = fd.getChildren(); + if (!(children.get(0) instanceof ExprNodeGenericFuncDesc) || + (!(((ExprNodeGenericFuncDesc) children.get(0)).getGenericUDF() + instanceof GenericUDFStruct))) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", children size " + + children.size() + ", child expression : " + children.get(0).getExprString()); + } + return null; + } + + // 3. See if the IN (STRUCT(EXP1, EXP2,..) has atleast one expression with partition + // column with single table alias. If not bail out. + // We might have expressions containing only partitioning columns, say, T1.A + T2.B + // where T1.A and T2.B are both partitioning columns. + // However, these expressions should not be considered as valid expressions for separation. + if (!hasAtleastOneSubExprWithPartColOrVirtualColWithOneTableAlias(children.get(0))) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + + ", there are no expression containing partition columns in struct fields"); + } + return null; + } + + // 4. See if all the field expressions of the left hand side of IN are expressions + // containing constants or only partition columns coming from same table. + // If so, we need not perform this optimization and we should bail out. + if (hasAllSubExprWithConstOrPartColOrVirtualColWithOneTableAlias(children.get(0))) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + + ", all fields are expressions containing constants or only partition columns" + + "coming from same table"); + } + return null; + } + + /***************************************************************************************\ + END : Early terminations for Partition Column Separator + /***************************************************************************************/ + + + /***************************************************************************************\ + BEGIN : Actual processing of the IN (STRUCT(..)) expression. + /***************************************************************************************/ + Map tableAliasToInfo = + new HashMap<>(); + ExprNodeGenericFuncDesc originalStructDesc = ((ExprNodeGenericFuncDesc) children.get(0)); + List originalDescChildren = originalStructDesc.getChildren(); + /** + * PASS 1 : Iterate through the original IN(STRUCT(..)) and populate the tableAlias to + * predicate information inside tableAliasToInfo. + */ + for (int i = 0; i < originalDescChildren.size(); i++) { + ExprNodeDesc en = originalDescChildren.get(i); + String tabAlias = null; + + // If the current expression node does not have a virtual/partition column or + // single table alias reference, ignore it and move to the next expression node. + if (!exprContainsOnlyPartitionColOrVirtualColOrConstants(en) || + (tabAlias = getTableAlias(en)) == null) { + continue; + } + + TableInfo currTableInfo = null; + + // If the table alias to information map already contains the current table, + // use the existing TableInfo object. Else, create a new one. + if (tableAliasToInfo.containsKey(tabAlias)) { + currTableInfo = tableAliasToInfo.get(tabAlias); + } else { + currTableInfo = new TableInfo(); + } + currTableInfo.exprNodeLHSDescriptor.add(en); + + // Iterate through the children nodes of the IN clauses starting from index 1, + // which corresponds to the right hand side of the IN list. + // Insert the value corresponding to the current expression in currExprNodeInfo.exprNodeValues. + for (int j = 1; j < children.size(); j++) { + ExprNodeDesc currChildStructExpr = children.get(j); + ExprNodeDesc newConstStructElement = null; + + // 1. Get the constant value associated with the current element in the struct. + // If the current child struct expression is a constant struct. + if (currChildStructExpr instanceof ExprNodeConstantDesc) { + List cnCols = (List)(((ExprNodeConstantDesc) (children.get(j))).getValue()); + newConstStructElement = new ExprNodeConstantDesc(cnCols.get(i)); + } else { + // This better be a generic struct with constant values as the children. + List cnChildren = ((ExprNodeGenericFuncDesc) children.get(j)).getChildren(); + newConstStructElement = new ExprNodeConstantDesc( + (((ExprNodeConstantDesc) (cnChildren.get(i))).getValue())); + } + + // 2. Insert the current constant value into exprNodeStructs list. + // If there is no struct corresponding to the current element, create a new one, insert + // the constant value into it and add the struct as part of exprNodeStructs. + if (currTableInfo.exprNodeRHSStructs.size() < j) { + List newConstStructList = new ArrayList(); + newConstStructList.add(newConstStructElement); + currTableInfo.exprNodeRHSStructs.add(newConstStructList); + } else { + // We already have a struct node for the current index. Insert the constant value + // into the corresponding struct node. + currTableInfo.exprNodeRHSStructs.get(j-1).add(newConstStructElement); + } + } + + // Insert the current table alias entry into the map if not already present in tableAliasToInfo. + if (!tableAliasToInfo.containsKey(tabAlias)) { + tableAliasToInfo.put(tabAlias, currTableInfo); + } + } + + /** + * PASS 2 : Iterate through the tableAliasToInfo populated via PASS 1 + * to generate the new expression. + */ + // subExpr is the list containing generated IN clauses as a result of this optimization. + final List subExpr = + new ArrayList(originalDescChildren.size()+1); + + for (Entry entry : + tableAliasToInfo.entrySet()) { + TableInfo currTableInfo = entry.getValue(); + List> currConstStructList = currTableInfo.exprNodeRHSStructs; + + // IN(STRUCT(..)..) ExprNodeDesc list for the current table alias. + List currInStructExprList = new ArrayList(); + + // Add the left hand side of the IN clause which contains the struct definition. + currInStructExprList.add(ExprNodeGenericFuncDesc.newInstance + (FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(), + STRUCT_UDF, + currTableInfo.exprNodeLHSDescriptor)); + + // Generate the right hand side of the IN clause + for (int i = 0; i < currConstStructList.size(); i++) { + List currConstStruct = currConstStructList.get(i); + + // Add the current constant struct to the right hand side of the IN clause. + currInStructExprList.add(ExprNodeGenericFuncDesc.newInstance + (FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(), + STRUCT_UDF, + currConstStruct)); + } + + // Add the newly generated IN clause to subExpr. + subExpr.add(new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, FunctionRegistry. + getFunctionInfo(IN_UDF).getGenericUDF(), currInStructExprList)); + } + /***************************************************************************************\ + END : Actual processing of the IN (STRUCT(..)) expression. + /***************************************************************************************/ + + // If there is only 1 table ALIAS, return it + if (subExpr.size() == 1) { + // Return the new expression containing only partition columns + return subExpr.get(0); + } + // Return the new expression containing only partition columns + // after concatenating them with AND operator + return new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getFunctionInfo(AND_UDF).getGenericUDF(), subExpr); + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java index 4799b4d..a1a49cd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java @@ -18,14 +18,10 @@ package org.apache.hadoop.hive.ql.optimizer; import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.Stack; import org.apache.calcite.util.Pair; @@ -50,18 +46,15 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.ListMultimap; /** @@ -78,48 +71,14 @@ GenericUDFIn.class.getAnnotation(Description.class).name(); private static final String STRUCT_UDF = GenericUDFStruct.class.getAnnotation(Description.class).name(); - private static final String AND_UDF = - GenericUDFOPAnd.class.getAnnotation(Description.class).name(); - // these are closure-bound for all the walkers in context public final int minOrExpr; - public final boolean extract; - public final boolean testMode; /* * Pass in configs and pre-create a parse context */ - public PointLookupOptimizer(final int min, final boolean extract, final boolean testMode) { + public PointLookupOptimizer(final int min) { this.minOrExpr = min; - this.extract = extract; - this.testMode = testMode; - } - - // Hash Set iteration isn't ordered, but force string sorted order - // to get a consistent test run. - private Collection sortForTests( - Set valuesExpr) { - if (!testMode) { - // normal case - sorting is wasted for an IN() - return valuesExpr; - } - final Collection sortedValues; - - sortedValues = ImmutableSortedSet.copyOf( - new Comparator() { - @Override - public int compare(ExprNodeDescEqualityWrapper w1, - ExprNodeDescEqualityWrapper w2) { - // fail if you find nulls (this is a test-code section) - if (w1.equals(w2)) { - return 0; - } - return w1.getExprNodeDesc().getExprString() - .compareTo(w2.getExprNodeDesc().getExprString()); - } - }, valuesExpr); - - return sortedValues; } @Override @@ -152,9 +111,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (LOG.isDebugEnabled()) { LOG.debug("Generated new predicate with IN clause: " + newPredicate); } - if (!extract) { - filterOp.getConf().setOrigPredicate(predicate); - } filterOp.getConf().setPredicate(newPredicate); } @@ -326,50 +282,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, newPredicate = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, FunctionRegistry.getFunctionInfo(IN_UDF).getGenericUDF(), newChildren); - if (extract && columns.size() > 1) { - final List subExpr = new ArrayList(columns.size()+1); - - // extract pre-conditions for the tuple expressions - // (a,b) IN ((1,2),(2,3)) -> - // ((a) IN (1,2) and b in (2,3)) and (a,b) IN ((1,2),(2,3)) - - for (String keyString : columnConstantsMap.keySet()) { - final Set valuesExpr = - new HashSet(children.size()); - final List> partial = - columnConstantsMap.get(keyString); - for (int i = 0; i < children.size(); i++) { - Pair columnConstant = partial - .get(i); - valuesExpr - .add(new ExprNodeDescEqualityWrapper(columnConstant.right)); - } - ExprNodeColumnDesc lookupCol = partial.get(0).left; - // generate a partial IN clause, if the column is a partition column - if (lookupCol.getIsPartitionColOrVirtualCol() - || valuesExpr.size() < children.size()) { - // optimize only nDV reductions - final List inExpr = new ArrayList(); - inExpr.add(lookupCol); - for (ExprNodeDescEqualityWrapper value : sortForTests(valuesExpr)) { - inExpr.add(value.getExprNodeDesc()); - } - subExpr.add(new ExprNodeGenericFuncDesc( - TypeInfoFactory.booleanTypeInfo, FunctionRegistry - .getFunctionInfo(IN_UDF).getGenericUDF(), inExpr)); - } - } - // loop complete, inspect the sub expressions generated - if (subExpr.size() > 0) { - // add the newPredicate to the end & produce an AND clause - subExpr.add(newPredicate); - newPredicate = new ExprNodeGenericFuncDesc( - TypeInfoFactory.booleanTypeInfo, FunctionRegistry - .getFunctionInfo(AND_UDF).getGenericUDF(), subExpr); - } - // else, newPredicate is unmodified - } - return newPredicate; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java index 7cdc730..2ab1575 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java @@ -48,9 +48,12 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /** @@ -364,6 +367,36 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return getResultWrapFromResults(results, fd, newNodeOutputs); } return new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, newNodeOutputs)); + } else if (fd.getGenericUDF() instanceof GenericUDFIn) { + List children = fd.getChildren(); + boolean removePredElem = false; + ExprNodeDesc lhs = children.get(0); + + if (lhs instanceof ExprNodeGenericFuncDesc) { + // Make sure that the generic udf is deterministic + if (FunctionRegistry.isDeterministic(((ExprNodeGenericFuncDesc) lhs) + .getGenericUDF())) { + boolean hasOnlyPartCols = true; + for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) lhs).getChildren()) { + // Check if the current field expression contains only + // partition column or a virtual column or constants. + // If yes, this filter predicate is a candidate for this optimization. + if (!(ed instanceof ExprNodeColumnDesc && + ((ExprNodeColumnDesc)ed).getIsPartitionColOrVirtualCol())) { + hasOnlyPartCols = false; + break; + } + } + removePredElem = hasOnlyPartCols; + } + } + + // If removePredElem is set to true, return true as this is a potential candidate + // for partition condition remover. Else, set the WalkState for this node to unknown. + return removePredElem ? + new NodeInfoWrapper(WalkState.TRUE, null, + new ExprNodeConstantDesc(fd.getTypeInfo(), Boolean.TRUE)) : + new NodeInfoWrapper(WalkState.UNKNOWN, null, getOutExpr(fd, nodeOutputs)) ; } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) { // If it's a non-deterministic UDF, set unknown to true return new NodeInfoWrapper(WalkState.UNKNOWN, null, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java index 7262164..fd51628 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java @@ -55,8 +55,7 @@ protected void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop, TableScanOperator top) throws SemanticException, UDFArgumentException { OpWalkerCtx owc = (OpWalkerCtx) procCtx; // Otherwise this is not a sampling predicate and we need to - ExprNodeDesc predicate = fop.getConf().getOrigPredicate(); - predicate = predicate == null ? fop.getConf().getPredicate() : predicate; + ExprNodeDesc predicate = fop.getConf().getPredicate(); String alias = top.getConf().getAlias(); // Generate the partition pruning predicate diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java index 6a31689..ccc4bb4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java @@ -79,7 +79,6 @@ public String toString() { private static final long serialVersionUID = 1L; private org.apache.hadoop.hive.ql.plan.ExprNodeDesc predicate; - private transient ExprNodeDesc origPredicate; private boolean isSamplingPred; private transient SampleDesc sampleDescr; //Is this a filter that should perform a comparison for sorted searches @@ -151,14 +150,6 @@ public void setSortedFilter(boolean isSortedFilter) { this.isSortedFilter = isSortedFilter; } - public void setOrigPredicate(ExprNodeDesc origPredicate) { - this.origPredicate = origPredicate; - } - - public ExprNodeDesc getOrigPredicate() { - return origPredicate; - } - /** * Some filters are generated or implied, which means it is not in the query. * It is added by the analyzer. For example, when we do an inner join, we add diff --git a/ql/src/test/queries/clientpositive/pcs.q b/ql/src/test/queries/clientpositive/pcs.q new file mode 100644 index 0000000..4b35a4d --- /dev/null +++ b/ql/src/test/queries/clientpositive/pcs.q @@ -0,0 +1,66 @@ +drop table pcs_t1; +drop table pcs_t2; + +create table pcs_t1 (key int, value string) partitioned by (ds string); +insert overwrite table pcs_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key; +insert overwrite table pcs_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key; +insert overwrite table pcs_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key; + +analyze table pcs_t1 partition(ds) compute statistics; +analyze table pcs_t1 partition(ds) compute statistics for columns; + +set hive.optimize.point.lookup = true; +set hive.optimize.point.lookup.min = 1; + +explain extended select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds; +select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds; + +set hive.optimize.point.lookup = false; +set hive.optimize.partition.columns.separate=true; +set hive.optimize.ppd=true; + +explain extended select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)); +select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)); + +explain extended select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)); +select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)); + +explain extended select /*+ MAPJOIN(pcs_t1) */ a.ds, b.key from pcs_t1 a join pcs_t1 b on a.ds=b.ds where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')); + +select /*+ MAPJOIN(pcs_t1) */ a.ds, b.key from pcs_t1 a join pcs_t1 b on a.ds=b.ds where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')); + +explain extended select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)); +select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)); + +explain select lag(key) over (partition by key) as c1 +from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)); +select lag(key) over (partition by key) as c1 +from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)); + +EXPLAIN EXTENDED +SELECT * FROM ( + SELECT X.* FROM pcs_t1 X WHERE struct(X.ds, X.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) + UNION ALL + SELECT Y.* FROM pcs_t1 Y WHERE struct(Y.ds, Y.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +) A +WHERE A.ds = '2008-04-08' +SORT BY A.key, A.value, A.ds; + +SELECT * FROM ( + SELECT X.* FROM pcs_t1 X WHERE struct(X.ds, X.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) + UNION ALL + SELECT Y.* FROM pcs_t1 Y WHERE struct(Y.ds, Y.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +) A +WHERE A.ds = '2008-04-08' +SORT BY A.key, A.value, A.ds; + +explain extended select ds from pcs_t1 where struct(case when ds='2000-04-08' then 10 else 20 end) in (struct(10),struct(11)); +select ds from pcs_t1 where struct(case when ds='2000-04-08' then 10 else 20 end) in (struct(10),struct(11)); + +explain extended select ds from pcs_t1 where struct(ds, key, rand(100)) in (struct('2000-04-08',1,0.2), struct('2000-04-09',2,0.3)); + +explain extended select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)); +select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)); + +explain extended select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0); +select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0); \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/pointlookup.q b/ql/src/test/queries/clientpositive/pointlookup.q index 1aef2ef..c460f39 100644 --- a/ql/src/test/queries/clientpositive/pointlookup.q +++ b/ql/src/test/queries/clientpositive/pointlookup.q @@ -18,8 +18,7 @@ WHERE set hive.optimize.point.lookup.min=3; -set hive.optimize.point.lookup.extract=false; - +set hive.optimize.partition.columns.separate=false; explain SELECT key FROM src @@ -38,8 +37,7 @@ WHERE AND value = '3')) ; -set hive.optimize.point.lookup.extract=true; - +set hive.optimize.partition.columns.separate=true; explain SELECT key FROM src diff --git a/ql/src/test/queries/clientpositive/pointlookup2.q b/ql/src/test/queries/clientpositive/pointlookup2.q index 31bebbb..94e99fb 100644 --- a/ql/src/test/queries/clientpositive/pointlookup2.q +++ b/ql/src/test/queries/clientpositive/pointlookup2.q @@ -14,7 +14,7 @@ from pcr_t1 insert overwrite table pcr_t2 select ds, key, value where ds='2000-04-08' and key=2; set hive.optimize.point.lookup.min=2; -set hive.optimize.point.lookup.extract=true; +set hive.optimize.partition.columns.separate=true; explain extended select key, value, ds diff --git a/ql/src/test/queries/clientpositive/pointlookup3.q b/ql/src/test/queries/clientpositive/pointlookup3.q index 3daa94b..79e7348 100644 --- a/ql/src/test/queries/clientpositive/pointlookup3.q +++ b/ql/src/test/queries/clientpositive/pointlookup3.q @@ -6,7 +6,7 @@ insert overwrite table pcr_t1 partition (ds1='2000-04-09', ds2='2001-04-09') sel insert overwrite table pcr_t1 partition (ds1='2000-04-10', ds2='2001-04-10') select * from src where key < 20 order by key; set hive.optimize.point.lookup.min=2; -set hive.optimize.point.lookup.extract=true; +set hive.optimize.partition.columns.separate=true; explain extended select key, value, ds1, ds2 diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out index eca29df..ddb05e2 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid.q.out @@ -153,7 +153,7 @@ STAGE PLANS: TableScan alias: acid Filter Operator - predicate: (key = 'foo') (type: boolean) + predicate: ((key = 'foo') and (ds) IN ('2008-04-08')) (type: boolean) Select Operator expressions: ROW__ID (type: struct), ds (type: string) outputColumnNames: _col0, _col3 @@ -390,7 +390,7 @@ STAGE PLANS: TableScan alias: acid Filter Operator - predicate: (key = 'foo') (type: boolean) + predicate: ((key = 'foo') and (ds) IN ('2008-04-08')) (type: boolean) Select Operator expressions: ROW__ID (type: struct), ds (type: string) outputColumnNames: _col0, _col3 diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index 4320f01..7b428bc 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -1275,21 +1275,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(UDFToInteger((hr / 2))) (type: double) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(UDFToInteger((hr / 2))) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Execution mode: llap Reducer 2 Execution mode: llap @@ -4076,21 +4061,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(UDFToInteger((hr / 2))) (type: double) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(UDFToInteger((hr / 2))) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Execution mode: llap Reducer 2 Execution mode: uber @@ -5229,21 +5199,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 - Select Operator - expressions: UDFToDouble(hr) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart_orc - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Execution mode: llap Reducer 2 Execution mode: uber diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index e30465d..e9192a3 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -1275,21 +1275,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(UDFToInteger((hr / 2))) (type: double) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(UDFToInteger((hr / 2))) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Execution mode: llap Reducer 2 Execution mode: llap @@ -4076,21 +4061,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(UDFToInteger((hr / 2))) (type: double) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(UDFToInteger((hr / 2))) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Execution mode: llap Reducer 2 Execution mode: vectorized, uber @@ -5229,21 +5199,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 - Select Operator - expressions: UDFToDouble(hr) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart_orc - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Execution mode: llap Reducer 2 Execution mode: uber diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out new file mode 100644 index 0000000..5cf0dff --- /dev/null +++ b/ql/src/test/results/clientpositive/pcs.q.out @@ -0,0 +1,2249 @@ +PREHOOK: query: drop table pcs_t1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table pcs_t1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table pcs_t2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table pcs_t2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table pcs_t1 (key int, value string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@pcs_t1 +POSTHOOK: query: create table pcs_t1 (key int, value string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@pcs_t1 +PREHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcs_t1@ds=2000-04-08 +POSTHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcs_t1@ds=2000-04-09 +POSTHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcs_t1@ds=2000-04-10 +POSTHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcs_t1@ds=2000-04-10 +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: analyze table pcs_t1 partition(ds) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +PREHOOK: Input: default@pcs_t1@ds=2000-04-10 +PREHOOK: Output: default@pcs_t1 +PREHOOK: Output: default@pcs_t1@ds=2000-04-08 +PREHOOK: Output: default@pcs_t1@ds=2000-04-09 +PREHOOK: Output: default@pcs_t1@ds=2000-04-10 +POSTHOOK: query: analyze table pcs_t1 partition(ds) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 +POSTHOOK: Output: default@pcs_t1 +POSTHOOK: Output: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Output: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Output: default@pcs_t1@ds=2000-04-10 +PREHOOK: query: analyze table pcs_t1 partition(ds) compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +PREHOOK: Input: default@pcs_t1@ds=2000-04-10 +#### A masked pattern was here #### +POSTHOOK: query: analyze table pcs_t1 partition(ds) compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 +#### A masked pattern was here #### +PREHOOK: query: explain extended select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + and + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + key + 1 + and + = + TOK_TABLE_OR_COL + ds + '2000-04-09' + = + TOK_TABLE_OR_COL + key + 2 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [pcs_t1] + /pcs_t1/ds=2000-04-09 [pcs_t1] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +2 val_2 2000-04-09 +PREHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + TOK_TABLE_OR_COL + ds + TOK_TABLE_OR_COL + key + TOK_FUNCTION + struct + '2000-04-08' + 1 + TOK_FUNCTION + struct + '2000-04-09' + 2 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [pcs_t1] + /pcs_t1/ds=2000-04-09 [pcs_t1] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +2000-04-09 +PREHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + TOK_TABLE_OR_COL + ds + + + TOK_TABLE_OR_COL + key + 2 + TOK_FUNCTION + struct + '2000-04-08' + 3 + TOK_FUNCTION + struct + '2000-04-09' + 4 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(ds,(key + 2))) IN (const struct('2000-04-08',3), const struct('2000-04-09',4)) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [pcs_t1] + /pcs_t1/ds=2000-04-09 [pcs_t1] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +2000-04-09 +PREHOOK: query: explain extended select /*+ MAPJOIN(pcs_t1) */ a.ds, b.key from pcs_t1 a join pcs_t1 b on a.ds=b.ds where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select /*+ MAPJOIN(pcs_t1) */ a.ds, b.key from pcs_t1 a join pcs_t1 b on a.ds=b.ds where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + pcs_t1 + a + TOK_TABREF + TOK_TABNAME + pcs_t1 + b + = + . + TOK_TABLE_OR_COL + a + ds + . + TOK_TABLE_OR_COL + b + ds + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + pcs_t1 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + ds + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + . + TOK_TABLE_OR_COL + a + ds + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + ds + TOK_FUNCTION + struct + '2000-04-08' + 1 + '2000-04-09' + TOK_FUNCTION + struct + '2000-04-09' + 2 + '2000-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: ds (type: string) + sort order: + + Map-reduce partition columns: ds (type: string) + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: int) + auto parallelism: false + TableScan + alias: b + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Reduce Output Operator + key expressions: ds (type: string) + sort order: + + Map-reduce partition columns: ds (type: string) + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: int) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [a, b] + /pcs_t1/ds=2000-04-09 [a, b] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 ds (type: string) + 1 ds (type: string) + outputColumnNames: _col0, _col2, _col6, _col8 + Statistics: Num rows: 44 Data size: 352 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (struct(_col2,_col0,_col8)) IN (const struct('2000-04-08',1,'2000-04-09'), const struct('2000-04-09',2,'2000-04-08')) (type: boolean) + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col6 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ MAPJOIN(pcs_t1) */ a.ds, b.key from pcs_t1 a join pcs_t1 b on a.ds=b.ds where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ MAPJOIN(pcs_t1) */ a.ds, b.key from pcs_t1 a join pcs_t1 b on a.ds=b.ds where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +PREHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + TOK_TABLE_OR_COL + ds + + + TOK_TABLE_OR_COL + key + TOK_TABLE_OR_COL + key + TOK_FUNCTION + struct + '2000-04-08' + 1 + TOK_FUNCTION + struct + '2000-04-09' + 2 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(ds,(key + key))) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [pcs_t1] + /pcs_t1/ds=2000-04-09 [pcs_t1] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +PREHOOK: query: explain select lag(key) over (partition by key) as c1 +from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +POSTHOOK: query: explain select lag(key) over (partition by key) as c1 +from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col0 + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: lag_window_0 + arguments: _col0 + name: lag + window function: GenericUDAFLagEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: lag_window_0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select lag(key) over (partition by key) as c1 +from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select lag(key) over (partition by key) as c1 +from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +NULL +PREHOOK: query: EXPLAIN EXTENDED +SELECT * FROM ( + SELECT X.* FROM pcs_t1 X WHERE struct(X.ds, X.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) + UNION ALL + SELECT Y.* FROM pcs_t1 Y WHERE struct(Y.ds, Y.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +) A +WHERE A.ds = '2008-04-08' +SORT BY A.key, A.value, A.ds +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +SELECT * FROM ( + SELECT X.* FROM pcs_t1 X WHERE struct(X.ds, X.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) + UNION ALL + SELECT Y.* FROM pcs_t1 Y WHERE struct(Y.ds, Y.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +) A +WHERE A.ds = '2008-04-08' +SORT BY A.key, A.value, A.ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_UNIONALL + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + X + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + X + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + . + TOK_TABLE_OR_COL + X + ds + . + TOK_TABLE_OR_COL + X + key + TOK_FUNCTION + struct + '2000-04-08' + 1 + TOK_FUNCTION + struct + '2000-04-09' + 2 + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + Y + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_TABNAME + Y + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + . + TOK_TABLE_OR_COL + Y + ds + . + TOK_TABLE_OR_COL + Y + key + TOK_FUNCTION + struct + '2000-04-08' + 1 + TOK_FUNCTION + struct + '2000-04-09' + 2 + A + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_WHERE + = + . + TOK_TABLE_OR_COL + A + ds + '2008-04-08' + TOK_SORTBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + A + key + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + A + value + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + A + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string) + sort order: +++ + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + tag: -1 + auto parallelism: false + TableScan + alias: y + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) and (ds = '2008-04-08')) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string) + sort order: +++ + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + tag: -1 + auto parallelism: false + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), '2008-04-08' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM ( + SELECT X.* FROM pcs_t1 X WHERE struct(X.ds, X.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) + UNION ALL + SELECT Y.* FROM pcs_t1 Y WHERE struct(Y.ds, Y.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +) A +WHERE A.ds = '2008-04-08' +SORT BY A.key, A.value, A.ds +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM ( + SELECT X.* FROM pcs_t1 X WHERE struct(X.ds, X.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) + UNION ALL + SELECT Y.* FROM pcs_t1 Y WHERE struct(Y.ds, Y.key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +) A +WHERE A.ds = '2008-04-08' +SORT BY A.key, A.value, A.ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +#### A masked pattern was here #### +PREHOOK: query: explain extended select ds from pcs_t1 where struct(case when ds='2000-04-08' then 10 else 20 end) in (struct(10),struct(11)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(case when ds='2000-04-08' then 10 else 20 end) in (struct(10),struct(11)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + TOK_FUNCTION + when + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + 10 + 20 + TOK_FUNCTION + struct + 10 + TOK_FUNCTION + struct + 11 + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Processor Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (const struct(10)) IN (const struct(10), const struct(11)) (type: boolean) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select ds from pcs_t1 where struct(case when ds='2000-04-08' then 10 else 20 end) in (struct(10),struct(11)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where struct(case when ds='2000-04-08' then 10 else 20 end) in (struct(10),struct(11)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +#### A masked pattern was here #### +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +2000-04-08 +PREHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key, rand(100)) in (struct('2000-04-08',1,0.2), struct('2000-04-09',2,0.3)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key, rand(100)) in (struct('2000-04-08',1,0.2), struct('2000-04-09',2,0.3)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + TOK_TABLE_OR_COL + ds + TOK_TABLE_OR_COL + key + TOK_FUNCTION + rand + 100 + TOK_FUNCTION + struct + '2000-04-08' + 1 + 0.2 + TOK_FUNCTION + struct + '2000-04-09' + 2 + 0.3 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(ds,key,rand(100))) IN (const struct('2000-04-08',1,0.2), const struct('2000-04-09',2,0.3)) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [pcs_t1] + /pcs_t1/ds=2000-04-09 [pcs_t1] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain extended select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + or + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + key + 2 + TOK_TABLE_OR_COL + key + TOK_FUNCTION + struct + true + 2 + TOK_FUNCTION + struct + false + 3 + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-10 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Processor Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(((ds = '2000-04-08') or (key = 2)),key)) IN (const struct(true,2), const struct(false,3)) (type: boolean) + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +PREHOOK: Input: default@pcs_t1@ds=2000-04-10 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 +#### A masked pattern was here #### +2000-04-08 +2000-04-09 +2000-04-10 +PREHOOK: query: explain extended select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + = + TOK_TABLE_OR_COL + key + 3 + and + TOK_FUNCTION + in + TOK_FUNCTION + struct + or + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + key + 2 + TOK_TABLE_OR_COL + key + TOK_FUNCTION + struct + true + 2 + TOK_FUNCTION + struct + false + 3 + > + + + TOK_TABLE_OR_COL + key + 5 + 0 + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-10 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Processor Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = 3) or ((struct(((ds = '2000-04-08') or (key = 2)),key)) IN (const struct(true,2), const struct(false,3)) and ((key + 5) > 0))) (type: boolean) + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +PREHOOK: Input: default@pcs_t1@ds=2000-04-10 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 +#### A masked pattern was here #### +2000-04-08 +2000-04-09 +2000-04-10 diff --git a/ql/src/test/results/clientpositive/pointlookup.q.out b/ql/src/test/results/clientpositive/pointlookup.q.out index 7e19be4..a99b388 100644 --- a/ql/src/test/results/clientpositive/pointlookup.q.out +++ b/ql/src/test/results/clientpositive/pointlookup.q.out @@ -176,15 +176,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value) IN ('1', '3', '5', '6', '8') and (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3'))) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(key,value)) IN (const struct('0','8'), const struct('1','5'), const struct('2','6'), const struct('3','8'), const struct('4','1'), const struct('5','6'), const struct('6','1'), const struct('7','1'), const struct('8','1'), const struct('9','1'), const struct('10','3')) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out index 7e28c77..792ccaf 100644 --- a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out @@ -1236,21 +1236,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(UDFToInteger((hr / 2))) (type: double) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(UDFToInteger((hr / 2))) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -3944,21 +3929,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(UDFToInteger((hr / 2))) (type: double) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(UDFToInteger((hr / 2))) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Reducer 2 Reduce Operator Tree: Group By Operator @@ -5063,21 +5033,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 - Select Operator - expressions: UDFToDouble(hr) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart_orc - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Reducer 2 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out index 1103e80..da2033b 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out @@ -1251,21 +1251,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(UDFToInteger((hr / 2))) (type: double) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(UDFToInteger((hr / 2))) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -3995,21 +3980,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(UDFToInteger((hr / 2))) (type: double) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(UDFToInteger((hr / 2))) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Reducer 2 Execution mode: vectorized Reduce Operator Tree: @@ -5131,21 +5101,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Target column: ds Target Vertex: Map 1 - Select Operator - expressions: UDFToDouble(hr) (type: double) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: double) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart_orc - Partition key expr: UDFToDouble(hr) - Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE - Target column: hr - Target Vertex: Map 1 Reducer 2 Reduce Operator Tree: Group By Operator