diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 14f362f..a80afa7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -88,6 +88,7 @@ public void initialize(HiveConf hiveConf) { } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) { + transformations.add(new PartitionColumnsSeparator()); transformations.add(new PartitionPruner()); transformations.add(new PartitionConditionRemover()); if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java new file mode 100644 index 0000000..e84b37c --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java @@ -0,0 +1,419 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.IdentityHashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.ForwardWalker; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.TypeRule; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +/** + * This optimization will take a Filter expression, and if its predicate contains + * an IN operator whose children are constant structs or structs containing constant fields, + * it will try to generate predicate with IN clauses containing only partition columns. + * This predicate is in turn used by the partition pruner to prune the columns that are not + * part of the original IN(STRUCT(..)..) predicate. + */ +public class PartitionColumnsSeparator implements Transform { + + private static final Log LOG = LogFactory.getLog(PointLookupOptimizer.class); + private static final String IN_UDF = + GenericUDFIn.class.getAnnotation(Description.class).name(); + private static final String STRUCT_UDF = + GenericUDFStruct.class.getAnnotation(Description.class).name(); + private static final String AND_UDF = + GenericUDFOPAnd.class.getAnnotation(Description.class).name(); + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + // 1. Trigger transformation + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), new StructInTransformer()); + + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); + GraphWalker ogw = new ForwardWalker(disp); + + List topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + return pctx; + } + + private class StructInTransformer implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + FilterOperator filterOp = (FilterOperator) nd; + ExprNodeDesc predicate = filterOp.getConf().getPredicate(); + + // Generate the list bucketing pruning predicate as 2 separate IN clauses + // containing the partitioning and non-partitioning columns. + ExprNodeDesc newPredicate = generateInClauses(predicate); + if (newPredicate != null) { + // Replace filter in current FIL with new FIL + if (LOG.isDebugEnabled()) { + LOG.debug("Generated new predicate with IN clause: " + newPredicate); + } + filterOp.getConf().setPartitionpruningPredicate(newPredicate); + } + + return null; + } + + private ExprNodeDesc generateInClauses(ExprNodeDesc predicate) throws SemanticException { + Map exprRules = new LinkedHashMap(); + exprRules.put(new TypeRule(ExprNodeGenericFuncDesc.class), new StructInExprProcessor()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(null, exprRules, null); + GraphWalker egw = new PreOrderOnceWalker(disp); + + List startNodes = new ArrayList(); + startNodes.add(predicate); + + HashMap outputMap = new HashMap(); + egw.startWalking(startNodes, outputMap); + return (ExprNodeDesc) outputMap.get(predicate); + } + } + + private class StructInExprProcessor implements NodeProcessor { + + // Mapping from expression node to is an expression containing only + // partition or virtual column or constants + private Map exprNodeToPartOrVirtualColExpr = + new IdentityHashMap(); + + private boolean exprContainsOnlyPartitionColOrVirtualColOrConstants(ExprNodeDesc en) { + if (en == null) { + return true; + } + if (exprNodeToPartOrVirtualColExpr.containsKey(en)) { + return exprNodeToPartOrVirtualColExpr.get(en); + } + if (en instanceof ExprNodeColumnDesc) { + boolean ret = ((ExprNodeColumnDesc)en).getIsPartitionColOrVirtualCol(); + exprNodeToPartOrVirtualColExpr.put(en, ret); + return ret; + } + if (en instanceof ExprNodeConstantDesc || en.getChildren() == null) { + exprNodeToPartOrVirtualColExpr.put(en, true); + return true; + } + for (ExprNodeDesc cn : en.getChildren()) { + if (!exprContainsOnlyPartitionColOrVirtualColOrConstants(cn)) { + exprNodeToPartOrVirtualColExpr.put(en, false); + return false; + } + } + exprNodeToPartOrVirtualColExpr.put(en, true); + return true; + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) nd; + + /***************************************************************************************\ + BEGIN : Early terminations for Partition Column Separator + /***************************************************************************************/ + // 1. If it is not an IN operator, we bail out. + if (!(fd.getGenericUDF() instanceof GenericUDFIn)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", is not IN operator : "); + } + return null; + } + + // 2. It is an IN operator with struct children + List children = fd.getChildren(); + if (children.size() < 2 || !(children.get(0) instanceof ExprNodeGenericFuncDesc) || + (!(((ExprNodeGenericFuncDesc) children.get(0)).getGenericUDF() + instanceof GenericUDFStruct))) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", children size " + + children.size() + ", child expression : " + children.get(0).getExprString()); + } + return null; + } + + // 3. See if there are partition columns in the struct, if not bail out. + boolean isValidOptimization = false; + for (ExprNodeDesc ed : ((ExprNodeGenericFuncDesc) children.get(0)).getChildren()) { + // Check if the current field expression contains only + // partition column or a virtual column or constants. + // If yes, this filter predicate is a candidate for this optimization. + if (!(ed instanceof ExprNodeConstantDesc) && + exprContainsOnlyPartitionColOrVirtualColOrConstants(ed)) { + isValidOptimization = true; + break; + } + } + if (!isValidOptimization) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + + ", there are no partition columns in struct fields"); + } + return null; + } + /***************************************************************************************\ + END : Early terminations for Partition Column Separator + /***************************************************************************************/ + + Map> tableAliasToExprNodeDesc = + new HashMap>(); + Map> tableAliasToPartName = new HashMap>(); + Map> tableAliasToTypeInfo = new HashMap>(); + ExprNodeGenericFuncDesc originalStructDesc = ((ExprNodeGenericFuncDesc) children.get(0)); + List originalDescChildren = originalStructDesc.getChildren(); + + // Set the first row of the IN clauses which is the struct field metadata. + for (ExprNodeDesc en : originalDescChildren) { + if (exprContainsOnlyPartitionColOrVirtualColOrConstants(en)) { + List exprNodeDescList; + List partNameList; + List typeInfoList; + String tabAlias = en instanceof ExprNodeColumnDesc ? ((ExprNodeColumnDesc)en).getTabAlias() : + en.getExprString(); + + if (!tableAliasToExprNodeDesc.containsKey(tabAlias)) { + exprNodeDescList = new ArrayList(); + exprNodeDescList.add(en); + tableAliasToExprNodeDesc.put(tabAlias, exprNodeDescList); + + partNameList = new ArrayList(); + partNameList.add(en.getName()); + tableAliasToPartName.put(tabAlias, partNameList); + + typeInfoList = new ArrayList(); + typeInfoList.add(en.getTypeInfo()); + tableAliasToTypeInfo.put(tabAlias, typeInfoList); + } else { + exprNodeDescList = tableAliasToExprNodeDesc.get(tabAlias); + exprNodeDescList.add(en); + + partNameList = tableAliasToPartName.get(tabAlias); + partNameList.add(en.getName()); + + typeInfoList = tableAliasToTypeInfo.get(tabAlias); + typeInfoList.add(en.getTypeInfo()); + } + } + } + + Map> tableAliasToInStruct = + new HashMap>(); + + for (Map.Entry> entry : tableAliasToExprNodeDesc.entrySet()) { + String currTabAlias = entry.getKey(); + List currStructExprList = new ArrayList(); + currStructExprList.add( new ExprNodeGenericFuncDesc( + TypeInfoFactory.getStructTypeInfo(tableAliasToPartName.get(currTabAlias), + tableAliasToTypeInfo.get(currTabAlias)), + FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(), + entry.getValue())); + tableAliasToInStruct.put(currTabAlias, currStructExprList); + } + + /** BEGIN FOR LOOP : Convert each row of the IN list to a list of structs. */ + for (int i = 1; i < children.size(); i++) { + // The children better be either constant structs or Generic Struct UDFs + // containing constant values, otherwise we cannot do this optimization. + if (!(children.get(i) instanceof ExprNodeConstantDesc || + (children.get(i) instanceof ExprNodeGenericFuncDesc && + ((ExprNodeGenericFuncDesc) children.get(i)). + getGenericUDF() instanceof GenericUDFStruct))) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + + ", child not a constant struct or Generic UDF struct " + + children.get(i).getExprString()); + } + return null; + } + + Map> tabAliasToConstPartColumns = new HashMap>(); + List constPartColumns = null; + + // Case 1 : The struct is a constant struct in which case the the value should be + // list of field values. + if (children.get(i) instanceof ExprNodeConstantDesc) { + // Sanity check. If the constant values do not match the metadata of the struct, + // return null. + if (!(((ExprNodeConstantDesc) (children.get(i))).getValue() instanceof List) || + ((List)((ExprNodeConstantDesc) (children.get(i))). + getValue()).size() != originalDescChildren.size()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", " + + children.get(i) + ", object value is : " + + ((ExprNodeConstantDesc) (children.get(i))).getValue()); + } + return null; + } + List cnCols = (List)(((ExprNodeConstantDesc) + (children.get(i))).getValue()); + + // For each field in the struct, add it to the constant partition columns + // or constant non-partition columns. + for (int j = 0; j < originalDescChildren.size(); j++) { + ExprNodeDesc en = originalDescChildren.get(j); + + if (exprContainsOnlyPartitionColOrVirtualColOrConstants(en)) { + String currTabAlias = en instanceof ExprNodeColumnDesc ? + ((ExprNodeColumnDesc)en).getTabAlias() : en.getExprString(); + + if (currTabAlias == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", " + + children.get(i) + ", internal error for unknown table name for column : " + + j); + } + return null; + } + if (!tabAliasToConstPartColumns.containsKey(currTabAlias)) { + constPartColumns = new ArrayList(); + constPartColumns.add(new ExprNodeConstantDesc(cnCols.get(j))); + tabAliasToConstPartColumns.put(currTabAlias, constPartColumns); + } else { + constPartColumns = tabAliasToConstPartColumns.get(currTabAlias); + constPartColumns.add(new ExprNodeConstantDesc(cnCols.get(j))); + } + } + } + } else { + List cnChildren = ((ExprNodeGenericFuncDesc) children.get(i)). + getChildren(); + // Case 2 : The struct is a generic UDF struct in which case the children size should + // match the number of struct fields and the children of the struct should be + // constant nodes. + if (cnChildren.size() != originalDescChildren.size()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", " + + " child struct size does not match with parent struct size " + + cnChildren.size() + ", " + originalDescChildren.size()); + } + return null; + } + + // For each of the struct field for the current row, add it to the corresponding struct. + for (int j = 0; j < originalDescChildren.size(); j++) { + // The fields of the struct better be constant nodes, otherwise bail out. + if (!(cnChildren.get(j) instanceof ExprNodeConstantDesc)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + ", " + + " struct field not a constant type for " + cnChildren.get(j).getExprString()); + } + return null; + } + ExprNodeDesc en = originalDescChildren.get(j); + + if (exprContainsOnlyPartitionColOrVirtualColOrConstants(en)) { + String currTabAlias = en instanceof ExprNodeColumnDesc ? + ((ExprNodeColumnDesc)en).getTabAlias() : en.getExprString(); + if (!tabAliasToConstPartColumns.containsKey(currTabAlias)) { + constPartColumns = new ArrayList(); + constPartColumns.add(cnChildren.get(j)); + tabAliasToConstPartColumns.put(currTabAlias, constPartColumns); + } else { + constPartColumns = tabAliasToConstPartColumns.get(currTabAlias); + constPartColumns.add(cnChildren.get(j)); + } + } + } + } + + // Convert the current row into structs. + // Consider T1 partitioned by A, B; T2 partitioned by C, T3 partitioned by D + // i.e. : STRUCT(T1.A, T1.B, T2.C, T3.D) => {STRUCT(T1.A, T1.B), STRUCT(T2.C), STRUCT(T3.D)} + for (Map.Entry> entry : tabAliasToConstPartColumns.entrySet()) { + String currTableName = entry.getKey(); + List currExprNodeList = tableAliasToInStruct.get(currTableName); + + currExprNodeList.add( new ExprNodeGenericFuncDesc( + TypeInfoFactory.getStructTypeInfo(tableAliasToPartName.get(currTableName), + tableAliasToTypeInfo.get(currTableName)), + FunctionRegistry.getFunctionInfo(STRUCT_UDF).getGenericUDF(), + tabAliasToConstPartColumns.get(entry.getKey()))); + } + } + /** END FOR LOOP : Convert each row of the IN list to a list of structs. */ + + // Sanity check before we return to avoid any errors. + if (tableAliasToInStruct.size() == 0) { + if (LOG.isDebugEnabled()) { + LOG.debug("Partition columns not separated for " + fd + + ", internal error for zero in clauses "); + } + return null; + } + + final List subExpr = + new ArrayList(originalDescChildren.size()+1); + + for (Map.Entry> entry : tableAliasToInStruct.entrySet()) { + subExpr.add(new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, FunctionRegistry. + getFunctionInfo(IN_UDF).getGenericUDF(), entry.getValue())); + } + + // If there is only 1 table ALIAS, return it + if (subExpr.size() == 1) { + // Return the new expression containing only partition columns + return subExpr.get(0); + } + // Return the new expression containing only partition columns + // after concatenating them with AND operator + return new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getFunctionInfo(AND_UDF).getGenericUDF(), subExpr); + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java index 7262164..075c962 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java @@ -55,7 +55,8 @@ protected void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop, TableScanOperator top) throws SemanticException, UDFArgumentException { OpWalkerCtx owc = (OpWalkerCtx) procCtx; // Otherwise this is not a sampling predicate and we need to - ExprNodeDesc predicate = fop.getConf().getOrigPredicate(); + ExprNodeDesc predicate = fop.getConf().getPartitionpruningPredicate() != null ? + fop.getConf().getPartitionpruningPredicate() : fop.getConf().getOrigPredicate(); predicate = predicate == null ? fop.getConf().getPredicate() : predicate; String alias = top.getConf().getAlias(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java index 6a31689..da79566 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FilterDesc.java @@ -80,6 +80,7 @@ public String toString() { private static final long serialVersionUID = 1L; private org.apache.hadoop.hive.ql.plan.ExprNodeDesc predicate; private transient ExprNodeDesc origPredicate; + private transient ExprNodeDesc partitionpruningPredicate; private boolean isSamplingPred; private transient SampleDesc sampleDescr; //Is this a filter that should perform a comparison for sorted searches @@ -159,6 +160,14 @@ public ExprNodeDesc getOrigPredicate() { return origPredicate; } + public void setPartitionpruningPredicate(ExprNodeDesc partprunePred) { + this.partitionpruningPredicate = partprunePred; + } + + public ExprNodeDesc getPartitionpruningPredicate() { + return partitionpruningPredicate; + } + /** * Some filters are generated or implied, which means it is not in the query. * It is added by the analyzer. For example, when we do an inner join, we add diff --git a/ql/src/test/queries/clientpositive/pcs.q b/ql/src/test/queries/clientpositive/pcs.q new file mode 100644 index 0000000..60e612f --- /dev/null +++ b/ql/src/test/queries/clientpositive/pcs.q @@ -0,0 +1,27 @@ +drop table pcs_t1; +drop table pcs_t2; + +create table pcs_t1 (key int, value string) partitioned by (ds string); +insert overwrite table pcs_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key; +insert overwrite table pcs_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key; +insert overwrite table pcs_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key; + +set hive.optimize.point.lookup = true; + +explain extended select key, value, ds from pcs_t1 where ds<='2000-04-09' and key<5 order by key, ds; +select key, value, ds from pcs_t1 where ds<='2000-04-09' and key<5 order by key, ds; + +set hive.optimize.point.lookup = false; +set hive.optimize.ppd=true; + +explain extended select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)); +select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)); + +explain extended select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)); +select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)); + +explain extended select a.ds, b.key from pcs_t1 a, pcs_t1 b where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')); +select a.ds, b.key from pcs_t1 a, pcs_t1 b where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')); + +explain extended select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)); +select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)); diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out new file mode 100644 index 0000000..d54e73c --- /dev/null +++ b/ql/src/test/results/clientpositive/pcs.q.out @@ -0,0 +1,1074 @@ +PREHOOK: query: drop table pcs_t1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table pcs_t1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table pcs_t2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table pcs_t2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table pcs_t1 (key int, value string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@pcs_t1 +POSTHOOK: query: create table pcs_t1 (key int, value string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@pcs_t1 +PREHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcs_t1@ds=2000-04-08 +POSTHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcs_t1@ds=2000-04-09 +POSTHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcs_t1@ds=2000-04-10 +POSTHOOK: query: insert overwrite table pcs_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcs_t1@ds=2000-04-10 +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcs_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain extended select key, value, ds from pcs_t1 where ds<='2000-04-09' and key<5 order by key, ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value, ds from pcs_t1 where ds<='2000-04-09' and key<5 order by key, ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + and + <= + TOK_TABLE_OR_COL + ds + '2000-04-09' + < + TOK_TABLE_OR_COL + key + 5 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key < 5) (type: boolean) + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: string) + sort order: ++ + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [pcs_t1] + /pcs_t1/ds=2000-04-09 [pcs_t1] + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds from pcs_t1 where ds<='2000-04-09' and key<5 order by key, ds +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds from pcs_t1 where ds<='2000-04-09' and key<5 order by key, ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +0 val_0 2000-04-08 +0 val_0 2000-04-08 +0 val_0 2000-04-08 +0 val_0 2000-04-09 +0 val_0 2000-04-09 +0 val_0 2000-04-09 +2 val_2 2000-04-08 +2 val_2 2000-04-09 +4 val_4 2000-04-08 +4 val_4 2000-04-09 +PREHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + TOK_TABLE_OR_COL + ds + TOK_TABLE_OR_COL + key + TOK_FUNCTION + struct + '2000-04-08' + 1 + TOK_FUNCTION + struct + '2000-04-09' + 2 + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Processor Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(ds,key)) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +2000-04-09 +PREHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + TOK_TABLE_OR_COL + ds + + + TOK_TABLE_OR_COL + key + 2 + TOK_FUNCTION + struct + '2000-04-08' + 3 + TOK_FUNCTION + struct + '2000-04-09' + 4 + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Processor Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(ds,(key + 2))) IN (const struct('2000-04-08',3), const struct('2000-04-09',4)) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +2000-04-09 +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain extended select a.ds, b.key from pcs_t1 a, pcs_t1 b where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select a.ds, b.key from pcs_t1 a, pcs_t1 b where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + pcs_t1 + a + TOK_TABREF + TOK_TABNAME + pcs_t1 + b + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + a + ds + TOK_SELEXPR + . + TOK_TABLE_OR_COL + b + key + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + . + TOK_TABLE_OR_COL + a + ds + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + ds + TOK_FUNCTION + struct + '2000-04-08' + 1 + '2000-04-09' + TOK_FUNCTION + struct + '2000-04-09' + 2 + '2000-04-08' + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: int), ds (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: int), _col1 (type: string) + auto parallelism: false + TableScan + alias: a + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: int), ds (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col0 (type: int), _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-10 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-10 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [$hdt$_0:a, $hdt$_1:a] + /pcs_t1/ds=2000-04-09 [$hdt$_0:a, $hdt$_1:a] + /pcs_t1/ds=2000-04-10 [$hdt$_0:a, $hdt$_1:a] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 66 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (struct(_col1,_col0,_col3)) IN (const struct('2000-04-08',1,'2000-04-09'), const struct('2000-04-09',2,'2000-04-08')) (type: boolean) + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 33 Data size: 264 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: select a.ds, b.key from pcs_t1 a, pcs_t1 b where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +PREHOOK: Input: default@pcs_t1@ds=2000-04-10 +#### A masked pattern was here #### +POSTHOOK: query: select a.ds, b.key from pcs_t1 a, pcs_t1 b where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 +#### A masked pattern was here #### +2000-04-09 9 +2000-04-09 8 +2000-04-09 5 +2000-04-09 5 +2000-04-09 5 +2000-04-09 4 +2000-04-09 2 +2000-04-09 19 +2000-04-09 18 +2000-04-09 18 +2000-04-09 17 +2000-04-09 15 +2000-04-09 15 +2000-04-09 12 +2000-04-09 12 +2000-04-09 11 +2000-04-09 10 +2000-04-09 0 +2000-04-09 0 +2000-04-09 0 +PREHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcs_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + TOK_FUNCTION + in + TOK_FUNCTION + struct + TOK_TABLE_OR_COL + ds + + + TOK_TABLE_OR_COL + key + TOK_TABLE_OR_COL + key + TOK_FUNCTION + struct + '2000-04-08' + 1 + TOK_FUNCTION + struct + '2000-04-09' + 2 + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcs_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcs_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcs_t1 + name: default.pcs_t1 + Processor Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(ds,(key + key))) IN (const struct('2000-04-08',1), const struct('2000-04-09',2)) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcs_t1 +PREHOOK: Input: default@pcs_t1@ds=2000-04-08 +PREHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select ds from pcs_t1 where struct(ds, key+key) in (struct('2000-04-08',1), struct('2000-04-09',2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcs_t1 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 +#### A masked pattern was here ####