diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 32ab3d8..2d74387 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -635,6 +635,14 @@ HIVE_STATS_NDV_ERROR("hive.stats.ndv.error", (float)20.0), HIVE_STATS_KEY_PREFIX_MAX_LENGTH("hive.stats.key.prefix.max.length", 150), HIVE_STATS_KEY_PREFIX("hive.stats.key.prefix", ""), // internal usage only + // if length of variable length data type cannot be determined this length will be used. + HIVE_STATS_MAX_VARIABLE_LENGTH("hive.stats.max.variable.length", 100), + // if number of elements in list cannot be determined, this value will be used + HIVE_STATS_LIST_NUM_ENTRIES("hive.stats.list.num.entries", 10), + // if number of elements in map cannot be determined, this value will be used + HIVE_STATS_MAP_NUM_ENTRIES("hive.stats.map.num.entries", 10), + // to accurately compute statistics for GROUPBY map side parallelism needs to be known + HIVE_STATS_MAP_SIDE_PARALLELISM("hive.stats.map.parallelism", 1), // Concurrency HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", false), diff --git data/files/alltypes.txt data/files/alltypes.txt new file mode 100644 index 0000000..d86a7bc --- /dev/null +++ data/files/alltypes.txt @@ -0,0 +1,2 @@ +true|10|100|1000|10000|4.0|20.0|2.2222|1969-12-31 15:59:58.174|1970-01-01 00:00:00|hello|k1:v1,k2:v2|100,200|{10, "foo"} +true|20|200|2000|20000|8.0|40.0|4.2222|1970-12-31 15:59:58.174|1971-01-01 00:00:00||k3:v3,k4:v4|200,300|{20, "bar"} diff --git data/files/dept.txt data/files/dept.txt new file mode 100644 index 0000000..292bee6 --- /dev/null +++ data/files/dept.txt @@ -0,0 +1,4 @@ +31|sales +33|engineering +34|clerical +35|marketing diff --git data/files/emp.txt data/files/emp.txt new file mode 100644 index 0000000..a0e76b9 --- /dev/null +++ data/files/emp.txt @@ -0,0 +1,6 @@ +Rafferty|31 +Jones|33 +Steinberg|33 +Robinson|34 +Smith|34 +John| diff --git data/files/loc.txt data/files/loc.txt new file mode 100644 index 0000000..69910b7 --- /dev/null +++ data/files/loc.txt @@ -0,0 +1,8 @@ +OH|31|43201|2001 +IO|32|43202|2001 +CA|35|43809|2001 +FL|33|54342|2001 +UT|35||2001 +CA|35|43809|2001 +|34|40000| +FL|33|54342|2001 diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index bad4f48..27117cd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -408,6 +408,8 @@ DROP_COMMAND_NOT_ALLOWED_FOR_PARTITION(30011, "Partition protected from being dropped"), COLUMNSTATSCOLLECTOR_INVALID_COLUMN(30012, "Column statistics are not supported " + "for partition columns"), + + STATISTICS_CLONING_FAILED(30013, "Cloning of statistics failed"), ; private int errorCode; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index 3b9a653..f74580d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -1594,4 +1595,12 @@ static boolean toString(StringBuilder builder, Set visited, Operator } return false; } + + public Statistics getStatistics() { + return getConf().getStatistics(); + } + + public void setStatistics(Statistics stats) { + getConf().setStatistics(stats); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 0703c86..f329ae8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPruner; import org.apache.hadoop.hive.ql.optimizer.pcr.PartitionConditionRemover; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; +import org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcessor; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -114,6 +115,9 @@ public void initialize(HiveConf hiveConf) { if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES)) { transformations.add(new StatsOptimizer()); } + if (pctx.getContext().getExplain()) { + transformations.add(new AnnotateWithStatistics()); + } transformations.add(new SimpleFetchOptimizer()); // must be called last if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEFETCHTASKAGGR)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateStatsProcCtx.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateStatsProcCtx.java new file mode 100644 index 0000000..181c12d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateStatsProcCtx.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.stats.annotation; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.plan.Statistics; + +public class AnnotateStatsProcCtx implements NodeProcessorCtx { + + private ParseContext pctx; + private HiveConf conf; + private Statistics andExprStats = null; + + public AnnotateStatsProcCtx(ParseContext pctx) { + this.setParseContext(pctx); + if(pctx != null) { + this.setConf(pctx.getConf()); + } else { + this.setConf(null); + } + } + + public HiveConf getConf() { + return conf; + } + + public void setConf(HiveConf conf) { + this.conf = conf; + } + + public ParseContext getParseContext() { + return pctx; + } + + public void setParseContext(ParseContext pctx) { + this.pctx = pctx; + } + + public Statistics getAndExprStats() { + return andExprStats; + } + + public void setAndExprStats(Statistics andExprStats) { + this.andExprStats = andExprStats; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java new file mode 100644 index 0000000..aac447a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.stats.annotation; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; +import org.apache.hadoop.hive.ql.exec.DemuxOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.LimitOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.PreOrderWalker; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.optimizer.Transform; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +public class AnnotateWithStatistics implements Transform { + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + AnnotateStatsProcCtx aspCtx = new AnnotateStatsProcCtx(pctx); + + // create a walker which walks the tree in a DFS manner while maintaining the + // operator stack. The dispatcher generates the plan from the operator tree + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("TS", TableScanOperator.getOperatorName() + "%"), + StatsRulesProcFactory.getTableScanRule()); + opRules.put(new RuleRegExp("SEL", SelectOperator.getOperatorName() + "%"), + StatsRulesProcFactory.getSelectRule()); + opRules.put(new RuleRegExp("FIL", FilterOperator.getOperatorName() + "%"), + StatsRulesProcFactory.getFilterRule()); + opRules.put(new RuleRegExp("GBY", GroupByOperator.getOperatorName() + "%"), + StatsRulesProcFactory.getGroupByRule()); + opRules.put(new RuleRegExp("JOIN", CommonJoinOperator.getOperatorName() + "%|" + + MapJoinOperator.getOperatorName() + "%"), StatsRulesProcFactory.getJoinRule()); + opRules.put(new RuleRegExp("LIM", LimitOperator.getOperatorName() + "%"), + StatsRulesProcFactory.getLimitRule()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(StatsRulesProcFactory.getDefaultRule(), opRules, + aspCtx); + GraphWalker ogw = new PreOrderWalker(disp); + + // Create a list of topop nodes + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + + return pctx; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java new file mode 100644 index 0000000..0bd8694 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -0,0 +1,1001 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.stats.annotation; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; +import org.apache.hadoop.hive.ql.exec.DemuxOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.LimitOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.stats.StatsUtils; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; +import org.apache.hadoop.hive.serde.serdeConstants; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +public class StatsRulesProcFactory { + + /** + * Collect basic statistics like number of rows, data size and column level + * statistics from the table. Also sets the state of the available statistics. + * Basic and column statistics can have one of the following states + * COMPLETE, PARTIAL, NONE. In case of partitioned table, the basic and column + * stats are aggregated together to table level statistics. + * + */ + public static class TableScanStatsRule extends DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + TableScanOperator tsop = (TableScanOperator) nd; + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + PrunedPartitionList partList = null; + try { + partList = aspCtx.getParseContext().getPrunedPartitions(tsop.getName(), tsop); + } catch (HiveException e1) { + throw new SemanticException(e1); + } + Table table = aspCtx.getParseContext().getTopToTable().get(tsop); + + // gather statistics for the first time and the attach it to table scan operator + Statistics stats = StatsUtils.collectStatistics(aspCtx.getConf(), partList, table, tsop); + try { + tsop.setStatistics(stats.clone()); + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + return null; + } + } + + /** + * SELECT operator doesn't change the number of rows emitted from the parent + * operator. It changes the size of each tuple emitted. In a typical case, + * where only subset of columns are selected the average row size will + * reduce as some of the columns will be pruned. In order to accurately + * compute the average row size, column level statistics is required. + * Column level statistics stores average size of values in column which + * can be used to more reliably estimate the reduction in size of each + * tuple. In the absence of column level statistics, size of columns will be + * based on data type. For primitive data types size from + * {@link org.apache.hadoop.hive.ql.util.JavaDataModel} will be + * used and for variable length data types worst case will be assumed. + * + *

+ * For more information, refer 'Estimating The Cost Of Operations' chapter in + * "Database Systems: The Complete Book" by Garcia-Molina et. al. + *

+ * + */ + public static class SelectStatsRule extends DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + SelectOperator sop = (SelectOperator) nd; + Operator parent = sop.getParentOperators().get(0); + Statistics parentStats = parent.getStatistics(); + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + HiveConf conf = aspCtx.getConf(); + + // SELECT (*) does not change the statistics. Just pass on the parent statistics + if (sop.getConf().isSelectStar()) { + try { + if (parentStats != null) { + sop.setStatistics(parentStats.clone()); + } + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + return null; + } + + try { + if (satisfyPrecondition(parentStats)) { + Statistics stats = parentStats.clone(); + List colStats = StatsUtils.getColStatisticsFromExprMap(conf, parentStats, + sop.getColumnExprMap(), sop.getSchema()); + long dataSize = StatsUtils.getDataSizeFromColumnStats(stats.getNumRows(), colStats); + stats.setColumnStats(colStats); + stats.setDataSize(dataSize); + sop.setStatistics(stats); + } else { + if (parentStats != null) { + sop.setStatistics(parentStats.clone()); + } + } + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + return null; + } + + } + + /** + * FILTER operator does not change the average row size but it does change + * the number of rows emitted. The reduction in the number of rows emitted + * is dependent on the filter expression. + * + *
    + * Notations: + *
  • T(S) - Number of tuples in relations S
  • + *
  • V(S,A) - Number of distinct values of attribute A in relation S
  • + *
+ * + *
    + * Rules: + *
  • Column equals a constant
  • T(S) = T(R) / V(R,A) + *

    + * + *

  • Inequality conditions
  • T(S) = T(R) / 3 + *

    + * + *

  • Not equals comparison
  • - Simple formula T(S) = T(R) + *

    + * - Alternate formula T(S) = T(R) (V(R,A) - 1) / V(R,A) + *

    + * + *

  • NOT condition
  • T(S) = 1 - T(S'), where T(S') is the satisfying condition + *

    + * + *

  • Multiple AND conditions
  • Cascadingly apply the rules 1 to 3 (order doesn't matter) + *

    + * + *

  • Multiple OR conditions
  • - Simple formula is to evaluate conditions independently + * and sum the results T(S) = m1 + m2 + *

    + * + * - Alternate formula T(S) = T(R) * ( 1 - ( 1 - m1/T(R) ) * ( 1 - m2/T(R) )) + *

    + * where, m1 is the number of tuples that satisfy condition1 and m2 is the number of tuples that + * satisfy condition2 + *

+ *

+ * For more information, refer 'Estimating The Cost Of Operations' chapter in + * "Database Systems: The Complete Book" by Garcia-Molina et. al. + *

+ * + */ + public static class FilterStatsRule extends DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + FilterOperator fop = (FilterOperator) nd; + Operator parent = fop.getParentOperators().get(0); + Statistics parentStats = parent.getStatistics(); + + try { + if (satisfyPrecondition(parentStats)) { + ExprNodeDesc pred = fop.getConf().getPredicate(); + + // evaluate filter expression and update statistics + long newNumRows = evaluateExpression(parentStats, pred, aspCtx); + Statistics st = parentStats.clone(); + updateStats(st, newNumRows); + fop.setStatistics(st); + } else { + if (parentStats != null) { + fop.setStatistics(parentStats.clone()); + } + } + + aspCtx.setAndExprStats(null); + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + return null; + } + + private long evaluateExpression(Statistics stats, ExprNodeDesc pred, + AnnotateStatsProcCtx aspCtx) throws CloneNotSupportedException { + long newNumRows = 0; + Statistics andStats = null; + if (pred instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred; + GenericUDF udf = genFunc.getGenericUDF(); + + // for AND condition cascadingly update stats + if (udf instanceof GenericUDFOPAnd) { + andStats = stats.clone(); + aspCtx.setAndExprStats(andStats); + + // evaluate children + for (ExprNodeDesc child : genFunc.getChildren()) { + newNumRows = evaluateChildExpr(aspCtx.getAndExprStats(), child, aspCtx); + updateStats(aspCtx.getAndExprStats(), newNumRows); + } + } else { + + // for OR condition independently compute and update stats + if (udf instanceof GenericUDFOPOr) { + for (ExprNodeDesc child : genFunc.getChildren()) { + newNumRows += evaluateChildExpr(stats, child, aspCtx); + } + } else if (udf instanceof GenericUDFOPNot) { + newNumRows = evaluateNotExpr(stats, pred, aspCtx); + } else if (udf instanceof GenericUDFOPNotNull) { + newNumRows = evaluateColEqualsNullExpr(stats, pred, aspCtx); + newNumRows = stats.getNumRows() - newNumRows; + } else if (udf instanceof GenericUDFOPNull) { + newNumRows = evaluateColEqualsNullExpr(stats, pred, aspCtx); + } else { + + // single predicate condition + newNumRows = evaluateChildExpr(stats, pred, aspCtx); + } + } + } else if (pred instanceof ExprNodeColumnDesc) { + + // can be boolean column in which case return true count + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) pred; + String colName = encd.getColumn(); + String tabAlias = encd.getTabAlias(); + String colType = encd.getTypeString(); + if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) { + ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + return cs.getNumTrues(); + } else { + + // if not boolean column return half the number of rows + return stats.getNumRows() / 2; + } + } + + return newNumRows; + } + + private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx) + throws CloneNotSupportedException { + + long numRows = stats.getNumRows(); + + // if the evaluate yields true then pass all rows else pass 0 rows + if (pred instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred; + for (ExprNodeDesc leaf : genFunc.getChildren()) { + if (leaf instanceof ExprNodeGenericFuncDesc) { + + // GenericUDF + long newNumRows = 0; + for (ExprNodeDesc child : ((ExprNodeGenericFuncDesc) pred).getChildren()) { + newNumRows = evaluateChildExpr(stats, child, aspCtx); + } + return numRows - newNumRows; + } else if (leaf instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc encd = (ExprNodeConstantDesc) leaf; + if (encd.getValue().equals(true)) { + return 0; + } else { + return numRows; + } + } else if (leaf instanceof ExprNodeColumnDesc) { + + // NOT on boolean columns is possible. in which case return false count. + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) leaf; + String colName = encd.getColumn(); + String tabAlias = encd.getTabAlias(); + String colType = encd.getTypeString(); + if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) { + ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + return cs.getNumFalses(); + } else { + + // if not boolean column return half the number of rows + return numRows / 2; + } + } + } + } + + // worst case + return numRows; + } + + private long evaluateColEqualsNullExpr(Statistics stats, ExprNodeDesc pred, + AnnotateStatsProcCtx aspCtx) { + + long numRows = stats.getNumRows(); + + // evaluate similar to "col = constant" expr + if (pred instanceof ExprNodeGenericFuncDesc) { + + ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred; + for (ExprNodeDesc leaf : genFunc.getChildren()) { + + if (leaf instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf; + String colName = colDesc.getColumn(); + String tabAlias = colDesc.getTabAlias(); + ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + if (cs != null) { + long dvs = cs.getCountDistint(); + // if NULLs exists, add 1 to distinct count + if (cs.getNumNulls() > 0) { + dvs += 1; + } + if (dvs != 0) { + return numRows / dvs; + } else { + return numRows; + } + } + } + } + } + + // worst case + return numRows; + } + + private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, AnnotateStatsProcCtx aspCtx) + throws CloneNotSupportedException { + + long numRows = stats.getNumRows(); + + if (child instanceof ExprNodeGenericFuncDesc) { + + ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) child; + GenericUDF udf = genFunc.getGenericUDF(); + + if (udf instanceof GenericUDFOPEqual || udf instanceof GenericUDFOPEqualNS) { + String colName = null; + String tabAlias = null; + boolean isConst = false; + + for (ExprNodeDesc leaf : genFunc.getChildren()) { + if (leaf instanceof ExprNodeConstantDesc) { + + // if the first argument is const then just set the flag and continue + if (colName == null) { + isConst = true; + continue; + } + ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + if (cs != null) { + long dvs = cs.getCountDistint(); + // if NULLs exists, add 1 to distinct count + if (cs.getNumNulls() > 0) { + dvs += 1; + } + + if (dvs != 0) { + return numRows / dvs; + } else { + return numRows; + } + } + } else if (leaf instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf; + colName = colDesc.getColumn(); + tabAlias = colDesc.getTabAlias(); + + // if const is first argument then evaluate the result + if (isConst) { + ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + if (cs != null) { + long dvs = cs.getCountDistint(); + // if NULLs exists, add 1 to distinct count + if (cs.getNumNulls() > 0) { + dvs += 1; + } + + if (dvs != 0) { + return numRows / dvs; + } else { + return numRows; + } + } + } + } + } + } else if (udf instanceof GenericUDFOPNotEqual) { + return numRows; + } else if (udf instanceof GenericUDFOPEqualOrGreaterThan || + udf instanceof GenericUDFOPEqualOrLessThan || + udf instanceof GenericUDFOPGreaterThan || + udf instanceof GenericUDFOPLessThan) { + return numRows / 3; + } else { + return evaluateExpression(stats, genFunc, aspCtx); + } + } + + // worst case + return numRows; + } + + } + + /** + * GROUPBY operator changes the number of rows. The number of rows emitted + * by GBY operator will be atleast 1 or utmost T(R) (number of rows in relation T) + * based on the aggregation. A better estimate can be found if we have column statistics + * on the columns that we are grouping on. + *

+ * Suppose if we are grouping by attributes A,B,C and if statistics for columns A,B,C are + * available then a better estimate can be found by taking the smaller of product of V(R,[A,B,C]) + * (product of distinct cardinalities of A,B,C) and T(R)/2. + *

+ * T(R) = min (T(R)/2 , V(R,[A,B,C]) ---> [1] + * + *

+ * In the presence of grouping sets, map-side GBY will emit more rows depending on the size of + * grouping set (input rows * size of grouping set). These rows will get reduced because of + * map-side hash aggregation. Hash aggregation is an optimization in hive to reduce the number of + * rows shuffled between map and reduce stage. This optimization will be disabled if the memory + * used for hash aggregation exceeds 90% of max available memory for hash aggregation. The number + * of rows emitted from map-side will vary if hash aggregation is enabled throughout execution or + * disabled. In the presence of grouping sets, following rules will be applied + *

+ * If hash-aggregation is enabled, for query SELECT * FROM table GROUP BY (A,B) WITH CUBE + *

+ * T(R) = min(T(R)/2, T(R, GBY(A,B)) + T(R, GBY(A)) + T(R, GBY(B)) + 1)) + *

+ * where, GBY(A,B), GBY(B), GBY(B) are the GBY rules mentioned above [1] + * + *

+ * If hash-aggregation is disabled, apply the GBY rule [1] and then multiply the result by + * number of elements in grouping set T(R) = T(R) * length_of_grouping_set. Since we do not know + * if hash-aggregation is enabled or disabled during compile time, we will assume worst-case i.e, + * hash-aggregation is disabled + * + *

+ * NOTE: The number of rows from map-side GBY operator is dependent on map-side parallelism i.e, + * number of mappers. The map-side parallelism is expected from hive config + * "hive.stats.map.parallelism". If the config is not set then default parallelism of 1 will be + * assumed. + * + *

+ * For more information, refer 'Estimating The Cost Of Operations' chapter in + * "Database Systems: The Complete Book" by Garcia-Molina et. al. + *

+ * + */ + public static class GroupByStatsRule extends DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + GroupByOperator gop = (GroupByOperator) nd; + Operator parent = gop.getParentOperators().get(0); + Statistics parentStats = parent.getStatistics(); + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + HiveConf conf = aspCtx.getConf(); + int mapSideParallelism = HiveConf.getIntVar(conf, + HiveConf.ConfVars.HIVE_STATS_MAP_SIDE_PARALLELISM); + + try { + if (satisfyPrecondition(parentStats)) { + Statistics stats = parentStats.clone(); + RowSchema rs = gop.getSchema(); + List aggDesc = gop.getConf().getAggregators(); + Map colExprMap = gop.getColumnExprMap(); + List colStats = StatsUtils.getColStatisticsFromExprMap(conf, parentStats, + colExprMap, rs); + stats.setColumnStats(colStats); + long dvProd = 1; + long newNumRows = 0; + + // compute product of distinct values of grouping columns + for (ColStatistics cs : colStats) { + if (cs != null) { + long dv = cs.getCountDistint(); + if (cs.getNumNulls() > 0) { + dv += 1; + } + dvProd *= dv; + } + } + + // map side + if (gop.getChildOperators().get(0) instanceof ReduceSinkOperator) { + + // since we do not know if hash-aggregation will be enabled or disabled + // at runtime we will assume that map-side group by does not do any reduction. + // hence no group by rule will be applied + + // map-side grouping set present. if grouping set is present then + // multiply the number of rows by number of elements in grouping set + if (gop.getConf().isGroupingSetsPresent()) { + int multiplier = gop.getConf().getListGroupingSets().size(); + + // take into account the map-side parallelism as well, default is 1 + multiplier *= mapSideParallelism; + newNumRows = multiplier * stats.getNumRows(); + long dataSize = multiplier * stats.getDataSize(); + stats.setNumRows(newNumRows); + stats.setDataSize(dataSize); + for (ColStatistics cs : colStats) { + if (cs != null) { + long oldNumNulls = cs.getNumNulls(); + long newNumNulls = multiplier * oldNumNulls; + cs.setNumNulls(newNumNulls); + } + } + } else { + + // map side no grouping set + newNumRows = stats.getNumRows() * mapSideParallelism; + updateStats(stats, newNumRows); + } + } else { + + // reduce side + newNumRows = applyGBYRule(stats.getNumRows(), dvProd); + updateStats(stats, newNumRows); + } + + // if UDAFs are present, new columns needs to be added + if (!aggDesc.isEmpty()) { + List aggColStats = Lists.newArrayList(); + for (ColumnInfo ci : rs.getSignature()) { + + // if the columns in row schema is not contained in column + // expression map, then those are the aggregate columns that + // are added GBY operator. we will estimate the column statistics + // for those newly added columns + if (!colExprMap.containsKey(ci.getInternalName())) { + String colName = ci.getInternalName(); + colName = StatsUtils.stripPrefixFromColumnName(colName); + String tabAlias = ci.getTabAlias(); + String colType = ci.getTypeName(); + ColStatistics cs = new ColStatistics(tabAlias, colName, colType); + cs.setCountDistint(stats.getNumRows()); + cs.setNumNulls(0); + cs.setAvgColLen(StatsUtils.getAvgColLenOfFixedLengthTypes(colType)); + aggColStats.add(cs); + } + } + stats.addToColumnStats(aggColStats); + + // if UDAF present and if column expression map is empty then it must + // be full aggregation query like count(*) in which case number of rows will be 1 + if (colExprMap.isEmpty()) { + stats.setNumRows(1); + updateStats(stats, 1); + } + } + + gop.setStatistics(stats); + } else { + if (parentStats != null) { + gop.setStatistics(parentStats.clone()); + } + } + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + return null; + } + + private long applyGBYRule(long numRows, long dvProd) { + long newNumRows = numRows; + + // to avoid divide by 2 to become 0 + if (numRows > 1) { + if (dvProd != 0) { + newNumRows = Math.min(numRows / 2, dvProd); + } else { + newNumRows = numRows / 2; + } + } + return newNumRows; + } + } + + /** + * JOIN operator can yield any of the following three cases
  • The values of join keys are + * disjoint in both relations in which case T(RXS) = 0 (we need histograms for this)
  • Join + * key is primary key on relation R and foreign key on relation S in which case every tuple in S + * will have a tuple in R T(RXS) = T(S) (we need histograms for this)
  • Both R & S relation + * have same value for join-key. Ex: bool column with all true values T(RXS) = T(R) * T(S) (we + * need histograms for this. counDistinct = 1 and same value)
  • + * + *

    + * In the absence of histograms, we can use the following general case + *

    + * Single attribute + *

    + * T(RXS) = (T(R)*T(S))/max(V(R,Y), V(S,Y)) where Y is the join attribute + *

    + * Multiple attributes + *

    + * T(RXS) = T(R)*T(S)/max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2)), where y1 and y2 are the join + * attributes + * + *

    + * For more information, refer 'Estimating The Cost Of Operations' chapter in + * "Database Systems: The Complete Book" by Garcia-Molina et. al. + *

    + */ + public static class JoinStatsRule extends DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + CommonJoinOperator jop = (CommonJoinOperator) nd; + List> parents = jop.getParentOperators(); + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + HiveConf conf = aspCtx.getConf(); + boolean allStatsAvail = true; + boolean allSatisfyPreCondition = true; + + for (Operator op : parents) { + if (op.getStatistics() == null) { + allStatsAvail = false; + } + } + + if (allStatsAvail) { + + for (Operator op : parents) { + if (!satisfyPrecondition(op.getStatistics())) { + allSatisfyPreCondition = false; + } + } + + if (allSatisfyPreCondition) { + // statistics object that is combination of statistics from all relations involved in JOIN + Statistics stats = new Statistics(); + long prodRows = 1; + List distinctVals = Lists.newArrayList(); + boolean multiAttr = false; + + + Map joinedColStats = Maps.newHashMap(); + Map> joinKeys = Maps.newHashMap(); + + // get the join keys from parent ReduceSink operators + for (int pos = 0; pos < parents.size(); pos++) { + ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); + + Statistics parentStats = parent.getStatistics(); + prodRows *= parentStats.getNumRows(); + List keyExprs = parent.getConf().getKeyCols(); + + // multi-attribute join key + if (keyExprs.size() > 1) { + multiAttr = true; + } + + // compute fully qualified join key column names. this name will be used to + // quickly look-up for column statistics of join key. + // TODO: expressions in join condition will be ignored. assign internal name + // for expressions and estimate column statistics for expression. + List fqCols = StatsUtils.getFullQualifedColNameFromExprs(keyExprs, + parent.getColumnExprMap()); + joinKeys.put(pos, fqCols); + + Map colExprMap = parent.getColumnExprMap(); + RowSchema rs = parent.getSchema(); + + // get column statistics for all output columns + List cs = StatsUtils.getColStatisticsFromExprMap(conf, parentStats, + colExprMap, rs); + for (ColStatistics c : cs) { + if (c != null) { + joinedColStats.put(c.getFullyQualifiedColName(), c); + } + } + + // since new statistics is derived from all relations involved in JOIN, + // we need to update the state information accordingly + stats.updateBasicStatsState(parentStats.getBasicStatsState()); + stats.updateColumnStatsState(parentStats.getColumnStatsState()); + } + + // compute denominator i.e, max(V(R,Y), V(S,Y)) in case of single attribute join. + // else max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2)) in case of multi-attribute join + long denom = 1; + if (multiAttr) { + List perAttrDVs = Lists.newArrayList(); + int numAttr = joinKeys.get(0).size(); + for (int idx = 0; idx < numAttr; idx++) { + for (Integer i : joinKeys.keySet()) { + String col = joinKeys.get(i).get(idx); + ColStatistics cs = joinedColStats.get(col); + if (cs != null) { + perAttrDVs.add(cs.getCountDistint()); + } + } + distinctVals.add(getDenominator(perAttrDVs)); + perAttrDVs.clear(); + } + + for (Long l : distinctVals) { + denom *= l; + } + } else { + for (List jkeys : joinKeys.values()) { + for (String jk : jkeys) { + ColStatistics cs = joinedColStats.get(jk); + if (cs != null) { + distinctVals.add(cs.getCountDistint()); + } + } + } + denom = getDenominator(distinctVals); + } + + // column statistics from different sources are put together and rename + // fully qualified column names based on output schema of join operator + Map colExprMap = jop.getColumnExprMap(); + RowSchema rs = jop.getSchema(); + List outColStats = Lists.newArrayList(); + for (ColumnInfo ci : rs.getSignature()) { + String key = ci.getInternalName(); + ExprNodeDesc end = colExprMap.get(key); + if (end instanceof ExprNodeColumnDesc) { + String colName = ((ExprNodeColumnDesc) end).getColumn(); + colName = StatsUtils.stripPrefixFromColumnName(colName); + String tabAlias = ((ExprNodeColumnDesc) end).getTabAlias(); + String fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName); + ColStatistics cs = joinedColStats.get(fqColName); + String outColName = key; + String outTabAlias = ci.getTabAlias(); + outColName = StatsUtils.stripPrefixFromColumnName(outColName); + if (cs != null) { + cs.setColumnName(outColName); + cs.setTableAlias(outTabAlias); + } + outColStats.add(cs); + } + } + + // update join statistics + stats.setColumnStats(outColStats); + long newRowCount = prodRows / denom; + stats.setNumRows(newRowCount); + stats.setDataSize(StatsUtils.getDataSizeFromColumnStats(newRowCount, outColStats)); + jop.setStatistics(stats); + } + } + return null; + } + + private long getDenominator(List distinctVals) { + + // simple join from 2 relations + // denom = max(v1, v2) + if (distinctVals.size() <= 2) { + return Collections.max(distinctVals); + } else { + + // join from multiple relations + // denom = max(v1, v2) * max(v2, v3) * max(v3, v4) + long denom = 1; + for (int i = 0; i < distinctVals.size() - 1; i++) { + long v1 = distinctVals.get(i); + long v2 = distinctVals.get(i + 1); + if (v1 >= v2) { + denom *= v1; + } else { + denom *= v2; + } + } + return denom; + } + } + + } + + /** + * LIMIT operator changes the number of rows and thereby the data size. + * + */ + public static class LimitStatsRule extends DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + LimitOperator lop = (LimitOperator) nd; + Operator parent = lop.getParentOperators().get(0); + Statistics parentStats = parent.getStatistics(); + + try { + if (satisfyPrecondition(parentStats)) { + Statistics stats = parentStats.clone(); + long limit = -1; + limit = lop.getConf().getLimit(); + if (limit == -1) { + limit = lop.getConf().getLeastRows(); + } + + // if limit is greate than available rows then do not update statistics + if (limit <= parentStats.getNumRows()) { + updateStats(stats, limit); + } + lop.setStatistics(stats); + } else { + if (parentStats != null) { + lop.setStatistics(parentStats.clone()); + } + } + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + return null; + } + + } + + /** + * Default rule is to aggregate the statistics from all its parent operators. + * + */ + public static class DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + Operator op = (Operator) nd; + Statistics stats = op.getConf().getStatistics(); + if (stats == null) { + if (op.getParentOperators() != null) { + + // if parent statistics is null then that branch of the tree is not walked yet. + // don't update the stats until all branches are walked + if (isAllParentsContainStatistics(op)) { + stats = new Statistics(); + for (Operator parent : op.getParentOperators()) { + if (parent.getStatistics() != null) { + Statistics parentStats = parent.getStatistics(); + stats.addToNumRows(parentStats.getNumRows()); + stats.addToDataSize(parentStats.getDataSize()); + stats.updateBasicStatsState(parentStats.getBasicStatsState()); + stats.updateColumnStatsState(parentStats.getColumnStatsState()); + stats.addToColumnStats(parentStats.getColumnStats()); + op.getConf().setStatistics(stats); + } + } + } + } + } + + return null; + } + + // check if all parent statistics are available + private boolean isAllParentsContainStatistics(Operator op) { + for (Operator parent : op.getParentOperators()) { + if (parent.getStatistics() == null) { + return false; + } + } + return true; + } + + } + + public static NodeProcessor getTableScanRule() { + return new TableScanStatsRule(); + } + + public static NodeProcessor getSelectRule() { + return new SelectStatsRule(); + } + + public static NodeProcessor getFilterRule() { + return new FilterStatsRule(); + } + + public static NodeProcessor getGroupByRule() { + return new GroupByStatsRule(); + } + + public static NodeProcessor getJoinRule() { + return new JoinStatsRule(); + } + + public static NodeProcessor getLimitRule() { + return new LimitStatsRule(); + } + + public static NodeProcessor getDefaultRule() { + return new DefaultStatsRule(); + } + + /** + * Update the basic statistics of the statistics object based on the row number + * + * @param stats + * - statistics to be updated + * @param newNumRows + * - new number of rows + */ + static void updateStats(Statistics stats, long newNumRows) { + long oldRowCount = stats.getNumRows(); + double ratio = (double) newNumRows / (double) oldRowCount; + stats.setNumRows(newNumRows); + + List colStats = stats.getColumnStats(); + for (ColStatistics cs : colStats) { + long oldNumNulls = cs.getNumNulls(); + long oldDV = cs.getCountDistint(); + long newNumNulls = Math.round(ratio * oldNumNulls); + long newDV = oldDV; + + // if ratio is greater than 1, then number of rows increases. This can happen + // when some operators like GROUPBY duplicates the input rows in which case + // number of distincts should not change. Update the distinct count only when + // the output number of rows is less than input number of rows. + if (ratio <= 1.0) { + newDV = Math.round(ratio * oldDV); + } + cs.setNumNulls(newNumNulls); + cs.setCountDistint(newDV); + } + stats.setColumnStats(colStats); + long newDataSize = StatsUtils.getDataSizeFromColumnStats(newNumRows, colStats); + stats.setDataSize(newDataSize); + } + + static boolean satisfyPrecondition(Statistics stats) { + return stats != null && stats.getBasicStatsState().equals(Statistics.State.COMPLETE) + && !stats.getColumnStatsState().equals(Statistics.State.NONE); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java index c096a65..24694ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java @@ -21,6 +21,18 @@ public class AbstractOperatorDesc implements OperatorDesc { private boolean vectorMode = false; + protected transient Statistics statistics; + + @Override + @Explain(displayName = "Statistics", normalExplain = false) + public Statistics getStatistics() { + return statistics; + } + + @Override + public void setStatistics(Statistics statistics) { + this.statistics = statistics; + } @Override public Object clone() throws CloneNotSupportedException { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java new file mode 100644 index 0000000..0749dc0 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java @@ -0,0 +1,156 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.hive.ql.stats.StatsUtils; + + +public class ColStatistics { + + private String tabAlias; + private String colName; + private String colType; + private String fqColName; + private long countDistint; + private long numNulls; + private double avgColLen; + private long numTrues; + private long numFalses; + + public ColStatistics(String tabAlias, String colName, String colType) { + this.setTableAlias(tabAlias); + this.setColumnName(colName); + this.setColumnType(colType); + this.setFullyQualifiedColName(StatsUtils.getFullyQualifiedColumnName(tabAlias, colName)); + } + + public ColStatistics() { + this(null, null, null); + } + + public String getColumnName() { + return colName; + } + + public void setColumnName(String colName) { + this.colName = colName; + this.fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName); + } + + public String getColumnType() { + return colType; + } + + public void setColumnType(String colType) { + this.colType = colType; + } + + public long getCountDistint() { + return countDistint; + } + + public void setCountDistint(long countDistint) { + this.countDistint = countDistint; + } + + public long getNumNulls() { + return numNulls; + } + + public void setNumNulls(long numNulls) { + this.numNulls = numNulls; + } + + public double getAvgColLen() { + return avgColLen; + } + + public void setAvgColLen(double avgColLen) { + this.avgColLen = avgColLen; + } + + public String getFullyQualifiedColName() { + return fqColName; + } + + public void setFullyQualifiedColName(String fqColName) { + this.fqColName = fqColName; + } + + public String getTableAlias() { + return tabAlias; + } + + public void setTableAlias(String tabName) { + this.tabAlias = tabName; + this.fqColName = StatsUtils.getFullyQualifiedColumnName(tabName, colName); + } + + public long getNumTrues() { + return numTrues; + } + + public void setNumTrues(long numTrues) { + this.numTrues = numTrues; + } + + public long getNumFalses() { + return numFalses; + } + + public void setNumFalses(long numFalses) { + this.numFalses = numFalses; + } + + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(" fqColName: "); + sb.append(fqColName); + sb.append(" colName: "); + sb.append(colName); + sb.append(" colType: "); + sb.append(colType); + sb.append(" countDistincts: "); + sb.append(countDistint); + sb.append(" numNulls: "); + sb.append(numNulls); + sb.append(" avgColLen: "); + sb.append(avgColLen); + sb.append(" numTrues: "); + sb.append(numTrues); + sb.append(" numFalses: "); + sb.append(numFalses); + return sb.toString(); + } + + @Override + public ColStatistics clone() throws CloneNotSupportedException { + ColStatistics clone = new ColStatistics(tabAlias, colName, colType); + clone.setFullyQualifiedColName(fqColName); + clone.setAvgColLen(avgColLen); + clone.setCountDistint(countDistint); + clone.setNumNulls(numNulls); + clone.setNumTrues(numTrues); + clone.setNumFalses(numFalses); + return clone; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java index 36757e8..6c2efaf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java @@ -22,4 +22,6 @@ public interface OperatorDesc extends Serializable, Cloneable { public Object clone() throws CloneNotSupportedException; + public Statistics getStatistics(); + public void setStatistics(Statistics statistics); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java new file mode 100644 index 0000000..a16c8ff --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -0,0 +1,252 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.ql.stats.StatsUtils; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +/** + * Statistics. Describes the output of an operator in terms of size, rows, etc + * based on estimates. + */ +@SuppressWarnings("serial") +public class Statistics implements Serializable { + + public enum State { + COMPLETE, PARTIAL, NONE + } + + private long numRows; + private long dataSize; + private State basicStatsState; + private Map columnStats; + private State columnStatsState; + + public Statistics() { + this(0, 0); + } + + public Statistics(long nr, long ds) { + this.numRows = nr; + this.dataSize = ds; + this.basicStatsState = State.NONE; + this.columnStats = null; + this.columnStatsState = State.NONE; + } + + public long getNumRows() { + return numRows; + } + + public void setNumRows(long numRows) { + this.numRows = numRows; + } + + public long getDataSize() { + return dataSize; + } + + public void setDataSize(long dataSize) { + this.dataSize = dataSize; + } + + public State getBasicStatsState() { + return basicStatsState; + } + + public void setBasicStatsState(State basicStatsState) { + this.basicStatsState = basicStatsState; + } + + public State getColumnStatsState() { + return columnStatsState; + } + + public void setColumnStatsState(State columnStatsState) { + this.columnStatsState = columnStatsState; + } + + @Override + @Explain(displayName = "") + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(" numRows: "); + sb.append(numRows); + sb.append(" dataSize: "); + sb.append(dataSize); + sb.append(" basicStatsState: "); + sb.append(basicStatsState); + sb.append(" colStatsState: "); + sb.append(columnStatsState); + return sb.toString(); + } + + @Override + public Statistics clone() throws CloneNotSupportedException { + Statistics clone = new Statistics(numRows, dataSize); + clone.setBasicStatsState(basicStatsState); + clone.setColumnStatsState(columnStatsState); + if (columnStats != null) { + Map cloneColStats = Maps.newHashMap(); + for (Map.Entry entry : columnStats.entrySet()) { + cloneColStats.put(entry.getKey(), entry.getValue().clone()); + } + clone.setColumnStats(cloneColStats); + } + return clone; + } + + public void addToNumRows(long nr) { + numRows += nr; + } + + public void addToDataSize(long rds) { + dataSize += rds; + } + + public void setColumnStats(Map colStats) { + this.columnStats = colStats; + } + + public void setColumnStats(List colStats) { + columnStats = Maps.newHashMap(); + addToColumnStats(colStats); + } + + public void addToColumnStats(List colStats) { + + if (columnStats == null) { + columnStats = Maps.newHashMap(); + } + + if (colStats != null) { + for (ColStatistics cs : colStats) { + ColStatistics updatedCS = null; + if (cs != null) { + + String key = cs.getFullyQualifiedColName(); + // if column statistics for a column is already found then merge the statistics + if (columnStats.containsKey(key) && columnStats.get(key) != null) { + updatedCS = columnStats.get(key); + updatedCS.setAvgColLen(Math.max(updatedCS.getAvgColLen(), cs.getAvgColLen())); + updatedCS.setNumNulls(updatedCS.getNumNulls() + cs.getNumNulls()); + updatedCS.setCountDistint(Math.max(updatedCS.getCountDistint(), cs.getCountDistint())); + columnStats.put(key, updatedCS); + } else { + columnStats.put(key, cs); + } + } + } + } + } + + // newState + // ----------------------------------------- + // basicStatsState | COMPLETE PARTIAL NONE | + // |________________________________________| + // COMPLETE | COMPLETE PARTIAL PARTIAL | + // PARTIAL | PARTIAL PARTIAL PARTIAL | + // NONE | COMPLETE PARTIAL NONE | + // ----------------------------------------- + public void updateBasicStatsState(State newState) { + if (newState.equals(State.PARTIAL)) { + basicStatsState = State.PARTIAL; + } + + if (newState.equals(State.NONE)) { + if (basicStatsState.equals(State.NONE)) { + basicStatsState = State.NONE; + } else { + basicStatsState = State.PARTIAL; + } + } + + if (newState.equals(State.COMPLETE)) { + if (basicStatsState.equals(State.PARTIAL)) { + basicStatsState = State.PARTIAL; + } else { + basicStatsState = State.COMPLETE; + } + } + } + + // similar to the table above for basic stats + public void updateColumnStatsState(State newState) { + if (newState.equals(State.PARTIAL)) { + columnStatsState = State.PARTIAL; + } + + if (newState.equals(State.NONE)) { + if (columnStatsState.equals(State.NONE)) { + columnStatsState = State.NONE; + } else { + columnStatsState = State.PARTIAL; + } + } + + if (newState.equals(State.COMPLETE)) { + if (columnStatsState.equals(State.PARTIAL)) { + columnStatsState = State.PARTIAL; + } else { + columnStatsState = State.COMPLETE; + } + } + } + + public long getAvgRowSize() { + if (basicStatsState.equals(State.COMPLETE) && numRows != 0) { + return dataSize / numRows; + } + + return 0; + } + + public ColStatistics getColumnStatisticsFromFQColName(String fqColName) { + return columnStats.get(fqColName); + } + + public ColStatistics getColumnStatisticsFromColName(String colName) { + for (ColStatistics cs : columnStats.values()) { + if (cs.getColumnName().equalsIgnoreCase(colName)) { + return cs; + } + } + + return null; + } + + public ColStatistics getColumnStatisticsForColumn(String tabAlias, String colName) { + String fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName); + return getColumnStatisticsFromFQColName(fqColName); + } + + public List getColumnStats() { + if (columnStats != null) { + return Lists.newArrayList(columnStats.values()); + } + return null; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java new file mode 100644 index 0000000..593d112 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -0,0 +1,1202 @@ +package org.apache.hadoop.hive.ql.stats; + +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantBinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; +import org.apache.hadoop.io.BytesWritable; + +import com.google.common.base.Joiner; +import com.google.common.collect.Lists; + +public class StatsUtils { + + /** + * Collect table, partition and column level statistics + * + * @param conf + * - hive configuration + * @param partList + * - partition list + * @param table + * - table + * @param tableScanOperator + * - table scan operator + * @return statistics object + * @throws HiveException + */ + public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, + Table table, TableScanOperator tableScanOperator) { + + Statistics stats = new Statistics(); + + // column level statistics are required only for the columns that are needed + List schema = tableScanOperator.getSchema().getSignature(); + List neededColumns = tableScanOperator.getNeededColumns(); + String dbName = table.getDbName(); + String tabName = table.getTableName(); + + if (!table.isPartitioned()) { + long nr = getNumRows(dbName, tabName); + long rds = getRawDataSize(dbName, tabName); + if (rds <= 0) { + rds = getTotalSize(dbName, tabName); + + // if data size is still 0 then get file size + if (rds <= 0) { + rds = getFileSizeForTable(conf, table); + } + } + + // if basic stats are not available then return + if (nr <= 0 && rds <= 0) { + stats.setBasicStatsState(Statistics.State.NONE); + return stats; + } + + // if any basic stats is missing, mark it as partial stats + if (nr <= 0 || rds <= 0) { + stats.setBasicStatsState(Statistics.State.PARTIAL); + } + + // if both are available then we have complete basic stats + if (nr > 0 && rds > 0) { + stats.setBasicStatsState(Statistics.State.COMPLETE); + } + + // number of rows -1 means that statistics from metastore is not reliable + if (nr <= 0) { + nr = 0; + } + stats.setNumRows(nr); + stats.setDataSize(rds); + + List colStats = getTableColumnStats(table, schema, neededColumns); + + // if column stats available and if atleast one column doesn't have stats + // then mark it as partial + if (checkIfColStatsAvailable(colStats) && colStats.contains(null)) { + stats.setColumnStatsState(Statistics.State.PARTIAL); + } + + // if column stats available and if all columns have stats then mark it + // as complete + if (checkIfColStatsAvailable(colStats) && !colStats.contains(null)) { + stats.setColumnStatsState(Statistics.State.COMPLETE); + } + + if (!checkIfColStatsAvailable(colStats)) { + // if there is column projection and if we do not have stats then mark + // it as NONE. Else we will have stats for const/udf columns + if (!neededColumns.isEmpty()) { + stats.setColumnStatsState(Statistics.State.NONE); + } else { + stats.setColumnStatsState(Statistics.State.COMPLETE); + } + stats.addToColumnStats(null); + } else { + // set col stats and mark it as table level col stats + stats.addToColumnStats(colStats); + } + } else { + + // For partitioned tables, get the size of all the partitions after pruning + // the partitions that are not required + if (partList != null) { + List partNames = Lists.newArrayList(); + for (Partition part : partList.getNotDeniedPartns()) { + partNames.add(part.getName()); + } + + List rowCounts = getBasicStatForPartitions(table, partNames, + StatsSetupConst.ROW_COUNT); + List dataSizes = getBasicStatForPartitions(table, partNames, + StatsSetupConst.RAW_DATA_SIZE); + + long nr = getSumIgnoreNegatives(rowCounts); + long rds = getSumIgnoreNegatives(dataSizes); + if (rds <= 0) { + dataSizes = getBasicStatForPartitions(table, partNames, StatsSetupConst.TOTAL_SIZE); + rds = getSumIgnoreNegatives(dataSizes); + + // if data size still could not be determined, then fall back to filesytem to get file + // sizes + if (rds <= 0) { + dataSizes = getFileSizeForPartitions(conf, partList.getNotDeniedPartns()); + } + rds = getSumIgnoreNegatives(dataSizes); + } + + // basic stats + if (nr <= 0 && rds <= 0) { + stats.updateBasicStatsState(Statistics.State.NONE); + } else if (nr <= 0 || rds <= 0) { + stats.updateBasicStatsState(Statistics.State.PARTIAL); + } else { + if (containsNonPositives(rowCounts) || containsNonPositives(dataSizes)) { + stats.updateBasicStatsState(Statistics.State.PARTIAL); + } else { + stats.updateBasicStatsState(Statistics.State.COMPLETE); + } + } + + // number of rows -1 means that statistics from metastore is not reliable + if (nr <= 0) { + nr = 0; + } + stats.addToNumRows(nr); + stats.addToDataSize(rds); + + // column stats + for (Partition part : partList.getNotDeniedPartns()) { + List colStats = getPartitionColumnStats(table, part, schema, neededColumns); + if (checkIfColStatsAvailable(colStats) && colStats.contains(null)) { + stats.updateColumnStatsState(Statistics.State.PARTIAL); + } else if (checkIfColStatsAvailable(colStats) && !colStats.contains(null)) { + stats.updateColumnStatsState(Statistics.State.COMPLETE); + } else { + // if there is column projection and if we do not have stats then mark + // it as NONE. Else we will have stats for const/udf columns + if (!neededColumns.isEmpty()) { + stats.updateColumnStatsState(Statistics.State.NONE); + } else { + stats.updateColumnStatsState(Statistics.State.COMPLETE); + } + } + stats.addToColumnStats(colStats); + } + } + } + + return stats; + + } + + /** + * Find the bytes on disk occupied by a table + * + * @param conf + * - hive conf + * @param table + * - table + * @return size on disk + */ + public static long getFileSizeForTable(HiveConf conf, Table table) { + Path path = table.getPath(); + long size = 0; + try { + FileSystem fs = path.getFileSystem(conf); + size = fs.getContentSummary(path).getLength(); + } catch (Exception e) { + size = 0; + } + return size; + } + + /** + * Find the bytes on disks occupied by list of partitions + * + * @param conf + * - hive conf + * @param parts + * - partition list + * @return sizes of patitions + */ + public static List getFileSizeForPartitions(HiveConf conf, List parts) { + List sizes = Lists.newArrayList(); + for (Partition part : parts) { + Path path = part.getPartitionPath(); + long size = 0; + try { + FileSystem fs = path.getFileSystem(conf); + size = fs.getContentSummary(path).getLength(); + } catch (Exception e) { + size = 0; + } + sizes.add(size); + } + return sizes; + } + + private static boolean containsNonPositives(List vals) { + for (Long val : vals) { + if (val <= 0L) { + return true; + } + } + return false; + } + + /** + * Get sum of all values in the list that are >0 + * + * @param vals + * - list of values + * @return sum + */ + public static long getSumIgnoreNegatives(List vals) { + long result = 0; + for (Long l : vals) { + if (l > 0) { + result += l; + } + } + return result; + } + + /** + * Get the partition level columns statistics from metastore for all the needed columns + * + * @param table + * - table object + * @param part + * - partition object + * @param schema + * - output schema + * @param neededColumns + * - list of needed columns + * @return column statistics + */ + public static List getPartitionColumnStats(Table table, Partition part, + List schema, List neededColumns) { + + String dbName = table.getDbName(); + String tabName = table.getTableName(); + String partName = part.getName(); + List colStatistics = Lists.newArrayList(); + for (ColumnInfo col : schema) { + if (!col.isHiddenVirtualCol()) { + String colName = col.getInternalName(); + if (neededColumns.contains(colName)) { + String tabAlias = col.getTabAlias(); + ColStatistics cs = getParitionColumnStatsForColumn(dbName, tabName, partName, colName); + if (cs != null) { + cs.setTableAlias(tabAlias); + } + colStatistics.add(cs); + } + } + } + return colStatistics; + } + + /** + * Get the partition level columns statistics from metastore for a specific column + * + * @param dbName + * - database name + * @param tabName + * - table name + * @param partName + * - partition name + * @param colName + * - column name + * @return column statistics + */ + public static ColStatistics getParitionColumnStatsForColumn(String dbName, String tabName, + String partName, String colName) { + try { + ColumnStatistics colStats = Hive.get().getPartitionColumnStatistics(dbName, tabName, + partName, colName); + if (colStats != null) { + return getColStatistics(colStats.getStatsObj().get(0), tabName, colName); + } + } catch (HiveException e) { + return null; + } + return null; + } + + /** + * Will return true if column statistics for atleast one column is available + * + * @param colStats + * - column stats + * @return + */ + private static boolean checkIfColStatsAvailable(List colStats) { + for (ColStatistics cs : colStats) { + if (cs != null) { + return true; + } + } + return false; + } + + /** + * Get table level column stats for specified column + * + * @param dbName + * - database name + * @param tableName + * - table name + * @param colName + * - column name + * @return column stats + */ + public static ColStatistics getTableColumnStatsForColumn(String dbName, String tableName, + String colName) { + try { + ColumnStatistics colStat = Hive.get().getTableColumnStatistics(dbName, tableName, colName); + if (colStat != null) { + // there will be only one column statistics object + return getColStatistics(colStat.getStatsObj().get(0), tableName, colName); + } + } catch (HiveException e) { + return null; + } + return null; + } + + /** + * Convert ColumnStatisticsObj to ColStatistics + * + * @param cso + * - ColumnStatisticsObj + * @param tabName + * - table name + * @param colName + * - column name + * @return ColStatistics + */ + public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tabName, + String colName) { + ColStatistics cs = new ColStatistics(tabName, colName, cso.getColType()); + String colType = cso.getColType(); + ColumnStatisticsData csd = cso.getStatsData(); + if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)) { + cs.setCountDistint(csd.getLongStats().getNumDVs()); + cs.setNumNulls(csd.getLongStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive1()); + } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { + cs.setCountDistint(csd.getLongStats().getNumDVs()); + cs.setNumNulls(csd.getLongStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive2()); + } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) { + cs.setCountDistint(csd.getDoubleStats().getNumDVs()); + cs.setNumNulls(csd.getDoubleStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive1()); + } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { + cs.setCountDistint(csd.getDoubleStats().getNumDVs()); + cs.setNumNulls(csd.getDoubleStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive2()); + } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) { + cs.setCountDistint(csd.getStringStats().getNumDVs()); + cs.setNumNulls(csd.getStringStats().getNumNulls()); + cs.setAvgColLen(csd.getStringStats().getAvgColLen()); + } else if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) { + if (csd.getBooleanStats().getNumFalses() > 0 && csd.getBooleanStats().getNumTrues() > 0) { + cs.setCountDistint(2); + } else { + cs.setCountDistint(1); + } + cs.setNumTrues(csd.getBooleanStats().getNumTrues()); + cs.setNumFalses(csd.getBooleanStats().getNumFalses()); + cs.setNumNulls(csd.getBooleanStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive1()); + } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) { + cs.setAvgColLen(csd.getBinaryStats().getAvgColLen()); + cs.setNumNulls(csd.getBinaryStats().getNumNulls()); + } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) { + cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp()); + } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { + cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal()); + } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) { + cs.setAvgColLen(JavaDataModel.get().lengthOfDate()); + } else { + // Columns statistics for complex datatypes are not supported yet + return null; + } + return cs; + } + + /** + * Get table level column statistics from metastore for needed columns + * + * @param table + * - table + * @param schema + * - output schema + * @param neededColumns + * - list of needed columns + * @return column statistics + */ + public static List getTableColumnStats(Table table, List schema, + List neededColumns) { + + String dbName = table.getDbName(); + String tabName = table.getTableName(); + List colStatistics = Lists.newArrayList(); + for (ColumnInfo col : schema) { + if (!col.isHiddenVirtualCol()) { + String colName = col.getInternalName(); + if (neededColumns.contains(colName)) { + String tabAlias = col.getTabAlias(); + ColStatistics cs = getTableColumnStatsForColumn(dbName, tabName, colName); + if (cs != null) { + cs.setTableAlias(tabAlias); + } + colStatistics.add(cs); + } + } + } + return colStatistics; + } + + /** + * Get the raw data size of variable length data types + * + * @param conf + * - hive conf + * @param oi + * - object inspector + * @param colType + * - column type + * @return raw data size + */ + public static long getAvgColLenOfVariableLengthTypes(HiveConf conf, ObjectInspector oi, + String colType) { + + long configVarLen = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAX_VARIABLE_LENGTH); + + if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) { + + // constant string projection Ex: select "hello" from table + if (oi instanceof ConstantObjectInspector) { + ConstantObjectInspector coi = (ConstantObjectInspector) oi; + + // if writable constant is null then return size 0 + if (coi.getWritableConstantValue() == null) { + return 0; + } + + return coi.getWritableConstantValue().toString().length(); + } else if (oi instanceof WritableConstantStringObjectInspector) { + + // some UDFs return writable constant strings (fixed width) + // Ex: select upper("hello") from table + WritableConstantStringObjectInspector wcsoi = (WritableConstantStringObjectInspector) oi; + + return wcsoi.getWritableConstantValue().toString().length(); + } else if (oi instanceof WritableStringObjectInspector) { + + // some UDFs may emit strings of variable length. like pattern matching + // UDFs. it's hard to find the length of such UDFs. + // return the variable length from config + return configVarLen; + } + } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) { + + // constant byte arrays + if (oi instanceof ConstantObjectInspector) { + ConstantObjectInspector coi = (ConstantObjectInspector) oi; + + // if writable constant is null then return size 0 + if (coi.getWritableConstantValue() == null) { + return 0; + } + + BytesWritable bw = ((BytesWritable) coi.getWritableConstantValue()); + return bw.getLength(); + } else if (oi instanceof WritableConstantBinaryObjectInspector) { + + // writable constant byte arrays + WritableConstantBinaryObjectInspector wcboi = (WritableConstantBinaryObjectInspector) oi; + + return wcboi.getWritableConstantValue().getLength(); + } else if (oi instanceof WritableBinaryObjectInspector) { + + // return the variable length from config + return configVarLen; + } + } else { + + // complex types (map, list, struct, union) + return getSizeOfComplexTypes(conf, oi); + } + + return 0; + } + + /** + * Get the size of complex data types + * + * @param conf + * - hive conf + * @param oi + * - object inspector + * @return raw data size + */ + public static long getSizeOfComplexTypes(HiveConf conf, ObjectInspector oi) { + long result = 0; + int length = 0; + int listEntries = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_LIST_NUM_ENTRIES); + int mapEntries = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAP_NUM_ENTRIES); + + switch (oi.getCategory()) { + case PRIMITIVE: + String colType = oi.getTypeName(); + if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) { + int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colType); + result += JavaDataModel.get().lengthForStringOfLength(avgColLen); + } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) { + int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colType); + result += JavaDataModel.get().lengthForByteArrayOfSize(avgColLen); + } else { + result += getAvgColLenOfFixedLengthTypes(colType); + } + break; + case LIST: + if (oi instanceof StandardConstantListObjectInspector) { + + // constant list projection of known length + StandardConstantListObjectInspector scloi = (StandardConstantListObjectInspector) oi; + length = scloi.getWritableConstantValue().size(); + + // check if list elements are primitive or Objects + ObjectInspector leoi = scloi.getListElementObjectInspector(); + if (leoi.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) { + result += getSizeOfPrimitiveTypeArraysFromType(leoi.getTypeName(), length); + } else { + result += JavaDataModel.get().lengthForObjectArrayOfSize(length); + } + } else { + StandardListObjectInspector sloi = (StandardListObjectInspector) oi; + + // list overhead + (configured number of element in list * size of element) + long elemSize = getSizeOfComplexTypes(conf, sloi.getListElementObjectInspector()); + result += JavaDataModel.get().arrayList() + (listEntries * elemSize); + } + break; + case MAP: + if (oi instanceof StandardConstantMapObjectInspector) { + + // constant map projection of known length + StandardConstantMapObjectInspector scmoi = (StandardConstantMapObjectInspector) oi; + result += getSizeOfMap(scmoi); + } else { + StandardMapObjectInspector smoi = (StandardMapObjectInspector) oi; + result += getSizeOfComplexTypes(conf, smoi.getMapKeyObjectInspector()); + result += getSizeOfComplexTypes(conf, smoi.getMapValueObjectInspector()); + + // hash map overhead + result += JavaDataModel.get().hashMap(mapEntries); + } + break; + case STRUCT: + StructObjectInspector soi = (StructObjectInspector) oi; + + // add constant object overhead for struct + result += JavaDataModel.get().object(); + + // add constant struct field names references overhead + result += soi.getAllStructFieldRefs().size() * JavaDataModel.get().ref(); + for (StructField field : soi.getAllStructFieldRefs()) { + result += getSizeOfComplexTypes(conf, field.getFieldObjectInspector()); + } + break; + case UNION: + UnionObjectInspector uoi = (UnionObjectInspector) oi; + + // add constant object overhead for union + result += JavaDataModel.get().object(); + + // add constant size for unions tags + result += uoi.getObjectInspectors().size() * JavaDataModel.get().primitive1(); + for (ObjectInspector foi : uoi.getObjectInspectors()) { + result += getSizeOfComplexTypes(conf, foi); + } + break; + default: + break; + } + + return result; + } + + /** + * Get size of fixed length primitives + * + * @param colType + * - column type + * @return raw data size + */ + public static long getAvgColLenOfFixedLengthTypes(String colType) { + if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) { + return JavaDataModel.get().primitive1(); + } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { + return JavaDataModel.get().primitive2(); + } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) { + return JavaDataModel.get().lengthOfTimestamp(); + } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) { + return JavaDataModel.get().lengthOfDate(); + } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { + return JavaDataModel.get().lengthOfDecimal(); + } else { + return 0; + } + } + + /** + * Get the size of arrays of primitive types + * + * @param colType + * - column type + * @param length + * - array length + * @return raw data size + */ + public static long getSizeOfPrimitiveTypeArraysFromType(String colType, int length) { + if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) { + return JavaDataModel.get().lengthForIntArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { + return JavaDataModel.get().lengthForDoubleArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { + return JavaDataModel.get().lengthForLongArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) { + return JavaDataModel.get().lengthForByteArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) { + return JavaDataModel.get().lengthForBooleanArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) { + return JavaDataModel.get().lengthForTimestampArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) { + return JavaDataModel.get().lengthForDateArrayOfSize(length); + } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { + return JavaDataModel.get().lengthForDecimalArrayOfSize(length); + } else { + return 0; + } + } + + /** + * Estimate the size of map object + * + * @param scmoi + * - object inspector + * @return size of map + */ + public static long getSizeOfMap(StandardConstantMapObjectInspector scmoi) { + Map map = scmoi.getWritableConstantValue(); + ObjectInspector koi = scmoi.getMapKeyObjectInspector(); + ObjectInspector voi = scmoi.getMapValueObjectInspector(); + long result = 0; + for (Map.Entry entry : map.entrySet()) { + result += getWritableSize(koi, entry.getKey()); + result += getWritableSize(voi, entry.getValue()); + } + + // add additional overhead of each map entries + result += JavaDataModel.get().hashMap(map.entrySet().size()); + return result; + } + + /** + * Get size of primitive data types based on their respective writable object inspector + * + * @param oi + * - object inspector + * @param value + * - value + * @return raw data size + */ + public static long getWritableSize(ObjectInspector oi, Object value) { + if (oi instanceof WritableStringObjectInspector) { + WritableStringObjectInspector woi = (WritableStringObjectInspector) oi; + return JavaDataModel.get().lengthForStringOfLength( + woi.getPrimitiveWritableObject(value).getLength()); + } else if (oi instanceof WritableBinaryObjectInspector) { + WritableBinaryObjectInspector woi = (WritableBinaryObjectInspector) oi; + return JavaDataModel.get().lengthForByteArrayOfSize( + woi.getPrimitiveWritableObject(value).getLength()); + } else if (oi instanceof WritableBooleanObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableByteObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableDateObjectInspector) { + return JavaDataModel.get().lengthOfDate(); + } else if (oi instanceof WritableDoubleObjectInspector) { + return JavaDataModel.get().primitive2(); + } else if (oi instanceof WritableFloatObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableHiveDecimalObjectInspector) { + return JavaDataModel.get().lengthOfDecimal(); + } else if (oi instanceof WritableIntObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableLongObjectInspector) { + return JavaDataModel.get().primitive2(); + } else if (oi instanceof WritableShortObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableTimestampObjectInspector) { + return JavaDataModel.get().lengthOfTimestamp(); + } + + return 0; + } + + /** + * Get column statistics from parent statistics. + * + * @param conf + * - hive conf + * @param parentStats + * - parent statistics + * @param colExprMap + * - column expression map + * @param rowSchema + * - row schema + * @return column statistics + */ + public static List getColStatisticsFromExprMap(HiveConf conf, + Statistics parentStats, + Map colExprMap, RowSchema rowSchema) { + List cs = Lists.newArrayList(); + for (ColumnInfo ci : rowSchema.getSignature()) { + String outColName = ci.getInternalName(); + String outTabAlias = ci.getTabAlias(); + ExprNodeDesc end = colExprMap.get(outColName); + if (end == null) { + outColName = StatsUtils.stripPrefixFromColumnName(outColName); + end = colExprMap.get(outColName); + } + ColStatistics colStat = getColStatisticsFromExpression(conf, parentStats, end); + if (colStat != null) { + outColName = StatsUtils.stripPrefixFromColumnName(outColName); + colStat.setColumnName(outColName); + colStat.setTableAlias(outTabAlias); + } + cs.add(colStat); + } + return cs; + } + + /** + * Get column statistics expression nodes + * + * @param conf + * - hive conf + * @param parentStats + * - parent statistics + * @param end + * - expression nodes + * @return column statistics + */ + public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statistics parentStats, + ExprNodeDesc end) { + + if (end == null) { + return null; + } + + String colName = null; + String colType = null; + double avgColSize = 0; + long countDistincts = 0; + long numNulls = 0; + ObjectInspector oi = null; + long numRows = parentStats.getNumRows(); + String tabAlias = null; + + if (end instanceof ExprNodeColumnDesc) { + // column projection + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; + colName = encd.getColumn(); + tabAlias = encd.getTabAlias(); + colName = stripPrefixFromColumnName(colName); + + if (encd.getIsPartitionColOrVirtualCol()) { + + // vitual columns + colType = encd.getTypeInfo().getTypeName(); + countDistincts = numRows; + oi = encd.getWritableObjectInspector(); + } else { + + // clone the column stats and return + ColStatistics result = parentStats.getColumnStatisticsForColumn(tabAlias, colName); + if (result != null) { + try { + return result.clone(); + } catch (CloneNotSupportedException e) { + return null; + } + } + return null; + } + } else if (end instanceof ExprNodeConstantDesc) { + + // constant projection + ExprNodeConstantDesc encd = (ExprNodeConstantDesc) end; + + // null projection + if (encd.getValue() == null) { + colName = encd.getName(); + colType = "null"; + numNulls = numRows; + } else { + colName = encd.getName(); + colType = encd.getTypeString(); + countDistincts = 1; + oi = encd.getWritableObjectInspector(); + } + } else if (end instanceof ExprNodeGenericFuncDesc) { + + // udf projection + ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end; + colName = engfd.getName(); + colType = engfd.getTypeString(); + countDistincts = numRows; + oi = engfd.getWritableObjectInspector(); + } else if (end instanceof ExprNodeNullDesc) { + + // null projection + ExprNodeNullDesc ennd = (ExprNodeNullDesc) end; + colName = ennd.getName(); + colType = "null"; + numNulls = numRows; + } + + if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME) + || colType.startsWith(serdeConstants.LIST_TYPE_NAME) + || colType.startsWith(serdeConstants.MAP_TYPE_NAME) + || colType.startsWith(serdeConstants.STRUCT_TYPE_NAME) + || colType.startsWith(serdeConstants.UNION_TYPE_NAME)) { + avgColSize = getAvgColLenOfVariableLengthTypes(conf, oi, colType); + } else { + avgColSize = getAvgColLenOfFixedLengthTypes(colType); + } + + ColStatistics colStats = new ColStatistics(tabAlias, colName, colType); + colStats.setAvgColLen(avgColSize); + colStats.setCountDistint(countDistincts); + colStats.setNumNulls(numNulls); + + return colStats; + } + + /** + * Get number of rows of a give table + * + * @param dbName + * - database name + * @param tabName + * - table name + * @return number of rows + */ + public static long getNumRows(String dbName, String tabName) { + return getBasicStatForTable(dbName, tabName, StatsSetupConst.ROW_COUNT); + } + + /** + * Get raw data size of a give table + * + * @param dbName + * - database name + * @param tabName + * - table name + * @return raw data size + */ + public static long getRawDataSize(String dbName, String tabName) { + return getBasicStatForTable(dbName, tabName, StatsSetupConst.RAW_DATA_SIZE); + } + + /** + * Get total size of a give table + * + * @param dbName + * - database name + * @param tabName + * - table name + * @return total size + */ + public static long getTotalSize(String dbName, String tabName) { + return getBasicStatForTable(dbName, tabName, StatsSetupConst.TOTAL_SIZE); + } + + /** + * Get basic stats of table + * + * @param dbName + * - database name + * @param tabName + * - table name + * @param statType + * - type of stats + * @return value of stats + */ + public static long getBasicStatForTable(String dbName, String tabName, String statType) { + + Table table; + try { + table = Hive.get().getTable(dbName, tabName); + } catch (HiveException e) { + return 0; + } + + Map params = table.getParameters(); + long result = 0; + + if (params != null) { + try { + result = Long.parseLong(params.get(statType)); + } catch (NumberFormatException e) { + result = 0; + } + } + return result; + } + + /** + * Get basic stats of partitions + * + * @param table + * - table + * @param partNames + * - partition names + * @param statType + * - type of stats + * @return value of stats + */ + public static List getBasicStatForPartitions(Table table, List partNames, + String statType) { + + List stats = Lists.newArrayList(); + List parts; + try { + parts = Hive.get().getPartitionsByNames(table, partNames); + } catch (HiveException e1) { + return stats; + } + + for (Partition part : parts) { + Map params = part.getParameters(); + long result = 0; + if (params != null) { + try { + result = Long.parseLong(params.get(statType)); + } catch (NumberFormatException e) { + result = 0; + } + stats.add(result); + } + } + return stats; + } + + /** + * Compute raw data size from column statistics + * + * @param numRows + * - number of rows + * @param colStats + * - column statistics + * @return raw data size + */ + public static long getDataSizeFromColumnStats(long numRows, List colStats) { + long result = 0; + + if (numRows <= 0) { + return result; + } + + for (ColStatistics cs : colStats) { + if (cs != null) { + String colType = cs.getColumnType(); + long nonNullCount = numRows - cs.getNumNulls(); + if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { + + result += nonNullCount * cs.getAvgColLen(); + } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) { + + int acl = (int) Math.round(cs.getAvgColLen()); + result += nonNullCount * JavaDataModel.get().lengthForStringOfLength(acl); + } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) { + + int acl = (int) Math.round(cs.getAvgColLen()); + result += nonNullCount * JavaDataModel.get().lengthForByteArrayOfSize(acl); + } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) { + + result += nonNullCount * JavaDataModel.get().lengthOfTimestamp(); + } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { + + result += nonNullCount * JavaDataModel.get().lengthOfDecimal(); + } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) { + + result += nonNullCount * JavaDataModel.get().lengthOfDate(); + } else { + + result += nonNullCount * cs.getAvgColLen(); + } + } + } + + return result; + } + + /** + * Remove KEY/VALUE prefix from column name + * + * @param colName + * - column name + * @return column name + */ + public static String stripPrefixFromColumnName(String colName) { + String stripedName = colName; + if (colName.startsWith("KEY._") || colName.startsWith("VALUE._")) { + // strip off KEY./VALUE. from column name + stripedName = colName.split("\\.")[1]; + } + return stripedName; + } + + /** + * Returns fully qualified name of column + * + * @param tabName + * @param colName + * @return + */ + public static String getFullyQualifiedColumnName(String tabName, String colName) { + return getFullyQualifiedName(null, tabName, colName); + } + + /** + * Returns fully qualified name of column + * + * @param dbName + * @param tabName + * @param colName + * @return + */ + public static String getFullyQualifiedColumnName(String dbName, String tabName, String colName) { + return getFullyQualifiedName(dbName, tabName, colName); + } + + /** + * Returns fully qualified name of column + * + * @param dbName + * @param tabName + * @param partName + * @param colName + * @return + */ + public static String getFullyQualifiedColumnName(String dbName, String tabName, String partName, + String colName) { + return getFullyQualifiedName(dbName, tabName, partName, colName); + } + + private static String getFullyQualifiedName(String... names) { + List nonNullAndEmptyNames = Lists.newArrayList(); + for (String name : names) { + if (name != null && !name.isEmpty()) { + nonNullAndEmptyNames.add(name); + } + } + return Joiner.on(".").join(nonNullAndEmptyNames); + } + + /** + * Try to get fully qualified column name from expression node + * + * @param keyExprs + * - expression nodes + * @param map + * - column expression map + * @return list of fully qualified names + */ + public static List getFullQualifedColNameFromExprs(List keyExprs, + Map map) { + List result = Lists.newArrayList(); + if (keyExprs != null) { + for (ExprNodeDesc end : keyExprs) { + String outColName = null; + for (Map.Entry entry : map.entrySet()) { + if (entry.getValue().isSame(end)) { + outColName = entry.getKey(); + } + } + if (end instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; + if (outColName == null) { + outColName = encd.getColumn(); + } + String tabAlias = encd.getTabAlias(); + outColName = stripPrefixFromColumnName(outColName); + result.add(getFullyQualifiedColumnName(tabAlias, outColName)); + } else if (end instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc enf = (ExprNodeGenericFuncDesc) end; + List cols = getFullQualifedColNameFromExprs(enf.getChildren(), map); + String joinedStr = Joiner.on(".").skipNulls().join(cols); + result.add(joinedStr); + } else if (end instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc encd = (ExprNodeConstantDesc) end; + result.add(encd.getValue().toString()); + } + } + } + return result; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java index 9c3c4c0..3352a08 100644 --- ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java +++ ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java @@ -267,6 +267,15 @@ public int lengthForIntArrayOfSize(int length) { public int lengthForBooleanArrayOfSize(int length) { return lengthForPrimitiveArrayOfSize(PRIMITIVE_BYTE, length); } + public int lengthForTimestampArrayOfSize(int length) { + return lengthForPrimitiveArrayOfSize(lengthOfTimestamp(), length); + } + public int lengthForDateArrayOfSize(int length) { + return lengthForPrimitiveArrayOfSize(lengthOfDate(), length); + } + public int lengthForDecimalArrayOfSize(int length) { + return lengthForPrimitiveArrayOfSize(lengthOfDecimal(), length); + } public int lengthOfDecimal() { // object overhead + 8 bytes for intCompact + 4 bytes for precision diff --git ql/src/test/queries/clientpositive/annotate_stats_filter.q ql/src/test/queries/clientpositive/annotate_stats_filter.q new file mode 100644 index 0000000..0a645a6 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_filter.q @@ -0,0 +1,74 @@ +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc; + +-- column stats are not COMPLETE, so stats are not updated +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc where state='OH'; + +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- state column has 5 distincts. numRows/countDistincts +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where state='OH'; + +-- not equals comparison shouldn't affect number of rows. rawDataSize is 792 and not 796 because of rounding off issue with avgColLen. avgColLen uses integers and not double. +-- numRows: 8 rawDataSize: 804 +explain extended select * from loc_orc where state!='OH'; +explain extended select * from loc_orc where state<>'OH'; + +-- nulls are treated as constant equality comparison +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where zip is null; +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where !(zip is not null); + +-- not nulls are treated as inverse of nulls +-- numRows: 7 rawDataSize: 702 +explain extended select * from loc_orc where zip is not null; +-- numRows: 7 rawDataSize: 702 +explain extended select * from loc_orc where !(zip is null); + +-- NOT evaluation. true will pass all rows, false will not pass any rows +-- numRows: 8 rawDataSize: 804 +explain extended select * from loc_orc where !false; +-- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where !true; + +-- OR evaluation. 1 row for OH and 1 row for CA +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where state='OH' or state='CA'; + +-- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2 +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where year=2001 and year is null; +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where year=2001 and state='OH' and state='FL'; + +-- AND and OR together. left expr will yield 1 row and right will yield 1 row +-- numRows: 3 rawDataSize: 306 +explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA'); + +-- AND and OR together. left expr will yield 8 rows and right will yield 1 row +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA'); + +-- all inequality conditions rows/3 is the rules +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where locid < 30; +explain extended select * from loc_orc where locid > 30; +explain extended select * from loc_orc where locid <= 30; +explain extended select * from loc_orc where locid >= 30; diff --git ql/src/test/queries/clientpositive/annotate_stats_groupby.q ql/src/test/queries/clientpositive/annotate_stats_groupby.q new file mode 100644 index 0000000..ce29d52 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_groupby.q @@ -0,0 +1,55 @@ +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc; + +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- only one distinct value in year column + 1 NULL value +-- map-side GBY: numRows: 8 (map-side will not do any reduction) +-- reduce-side GBY: numRows: 2 +explain extended select year from loc_orc group by year; + +-- map-side GBY: numRows: 8 +-- reduce-side GBY: numRows: 4 +explain extended select state,locid from loc_orc group by state,locid; + +-- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain extended select state,locid from loc_orc group by state,locid with cube; + +-- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain extended select state,locid from loc_orc group by state,locid with rollup; + +-- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state)); + +-- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid)); + +-- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()); + +-- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()); + +set hive.stats.map.parallelism=10; + +-- map-side GBY: numRows: 80 (map-side will not do any reduction) +-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2) +explain extended select year from loc_orc group by year; + +-- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) +explain extended select state,locid from loc_orc group by state,locid with cube; + diff --git ql/src/test/queries/clientpositive/annotate_stats_join.q ql/src/test/queries/clientpositive/annotate_stats_join.q new file mode 100644 index 0000000..5683498 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_join.q @@ -0,0 +1,80 @@ +create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile; + +create table if not exists dept_staging ( + deptid int, + deptname string +) row format delimited fields terminated by '|' stored as textfile; + +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table if not exists emp_orc like emp_staging; +alter table emp_orc set fileformat orc; + +create table if not exists dept_orc like dept_staging; +alter table dept_orc set fileformat orc; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging; +LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging; +LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging; + + +insert overwrite table emp_orc select * from emp_staging; +insert overwrite table dept_orc select * from dept_staging; +insert overwrite table loc_orc select * from loc_staging; + +analyze table emp_orc compute statistics for columns lastname,deptid; +analyze table dept_orc compute statistics for columns deptname,deptid; +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- number of rows +-- emp_orc - 6 +-- dept_orc - 4 +-- loc_orc - 8 + +-- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows) +-- emp_orc.deptid - 3 +-- emp_orc.lastname - 7 +-- dept_orc.deptid - 6 +-- dept_orc.deptname - 5 +-- loc_orc.locid - 6 +-- loc_orc.state - 7 + +-- Expected output rows: 4 +-- Reason: #rows = (6*4)/max(3,6) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid); + +-- 3 way join +-- Expected output rows: 4 +-- Reason: #rows = (6*4*6)/max(3,6)*max(6,3) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid); + +-- Expected output rows: 5 +-- Reason: #rows = (6*4*8)/max(3,6)*max(6,6) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid); + +-- join keys of different types +-- Expected output rows: 4 +-- Reason: #rows = (6*4*8)/max(3,6)*max(6,7) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state); + +-- multi-attribute join +-- Expected output rows: 0 +-- Reason: #rows = (6*4)/max(3,6)*max(7,5) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname); + +-- 3 way and multi-attribute join +-- Expected output rows: 0 +-- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state); + diff --git ql/src/test/queries/clientpositive/annotate_stats_limit.q ql/src/test/queries/clientpositive/annotate_stats_limit.q new file mode 100644 index 0000000..e739326 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_limit.q @@ -0,0 +1,28 @@ +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +analyze table loc_orc compute statistics for columns state, locid, zip, year; + +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc; + +-- numRows: 4 rawDataSize: 396 +explain extended select * from loc_orc limit 4; + +-- greater than the available number of rows +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc limit 16; + +-- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc limit 0; diff --git ql/src/test/queries/clientpositive/annotate_stats_part.q ql/src/test/queries/clientpositive/annotate_stats_part.q new file mode 100644 index 0000000..05f3a19 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_part.q @@ -0,0 +1,78 @@ +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging; + +create table if not exists loc_orc ( + state string, + locid int, + zip bigint +) partitioned by(year int) stored as orc; + +-- basicStatState: NONE colStatState: NONE +explain extended select * from loc_orc; + +set hive.stats.autogather=false; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +insert overwrite table loc_orc partition(year) select * from loc_staging; + +-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc; + +-- partition level analyze statistics for specific parition +analyze table loc_orc partition(year=2001) compute statistics; + +-- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__'; + +-- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc; + +-- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year=2001; + +-- partition level analyze statistics for all partitions +analyze table loc_orc partition(year) compute statistics; + +-- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__'; + +-- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc; + +-- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year=2001 or year='__HIVE_DEFAULT_PARTITION__'; + +-- both partitions will be pruned +-- basicStatState: NONE colStatState: NONE +explain extended select * from loc_orc where year=2001 and year='__HIVE_DEFAULT_PARTITION__'; + +-- partition level partial column statistics +analyze table loc_orc partition(year=2001) compute statistics for columns state,locid; + +-- basicStatState: COMPLETE colStatState: NONE +explain extended select zip from loc_orc; + +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state from loc_orc; + +-- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state,locid from loc_orc; + +-- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select state,locid from loc_orc where year=2001; + +-- basicStatState: COMPLETE colStatState: NONE +explain extended select state,locid from loc_orc where year!=2001; + +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select * from loc_orc; diff --git ql/src/test/queries/clientpositive/annotate_stats_select.q ql/src/test/queries/clientpositive/annotate_stats_select.q new file mode 100644 index 0000000..1986a80 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_select.q @@ -0,0 +1,138 @@ +create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile; + +create table alltypes_orc like alltypes; +alter table alltypes_orc set fileformat orc; + +load data local inpath '../../data/files/alltypes.txt' overwrite into table alltypes; + +insert overwrite table alltypes_orc select * from alltypes; + +-- basicStatState: COMPLETE colStatState: NONE numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc; + +-- statistics for complex types are not supported yet +analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1,s1; + +-- numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc; + +-- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc; + +-- col alias renaming +-- numRows: 2 rawDataSize: 8 +explain extended select i1 as int1 from alltypes_orc; + +-- numRows: 2 rawDataSize: 174 +explain extended select s1 from alltypes_orc; + +-- column statistics for complex types unsupported and so statistics will not be updated +-- numRows: 2 rawDataSize: 1514 +explain extended select m1 from alltypes_orc; + +-- numRows: 2 rawDataSize: 246 +explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc; + +-- numRows: 2 rawDataSize: 0 +explain extended select null from alltypes_orc; + +-- numRows: 2 rawDataSize: 8 +explain extended select 11 from alltypes_orc; + +-- numRows: 2 rawDataSize: 16 +explain extended select 11L from alltypes_orc; + +-- numRows: 2 rawDataSize: 16 +explain extended select 11.0 from alltypes_orc; + +-- numRows: 2 rawDataSize: 178 +explain extended select "hello" from alltypes_orc; + +-- numRows: 2 rawDataSize: 96 +explain extended select unbase64("0xe23") from alltypes_orc; + +-- numRows: 2 rawDataSize: 16 +explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc; + +-- numRows: 2 rawDataSize: 80 +explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc; + +-- numRows: 2 rawDataSize: 112 +explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc; + +-- numRows: 2 rawDataSize: 224 +explain extended select cast("58.174" as DECIMAL) from alltypes_orc; + +-- numRows: 2 rawDataSize: 112 +explain extended select array(1,2,3) from alltypes_orc; + +-- numRows: 2 rawDataSize: 1508 +explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc; + +-- numRows: 2 rawDataSize: 112 +explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc; + +-- numRows: 2 rawDataSize: 250 +explain extended select CREATE_UNION(0, "hello") from alltypes_orc; + +-- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 8 +explain extended select count(*) from alltypes_orc; + +-- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 8 +explain extended select count(1) from alltypes_orc; + +-- column statistics for complex column types will be missing. data size will be calculated from available column statistics +-- numRows: 2 rawDataSize: 254 +explain extended select *,11 from alltypes_orc; + +-- subquery selects +-- inner select - numRows: 2 rawDataSize: 8 +-- outer select - numRows: 2 rawDataSize: 8 +explain extended select i1 from (select i1 from alltypes_orc limit 10) temp; + +-- inner select - numRows: 2 rawDataSize: 16 +-- outer select - numRows: 2 rawDataSize: 8 +explain extended select i1 from (select i1,11 from alltypes_orc limit 10) temp; + +-- inner select - numRows: 2 rawDataSize: 16 +-- outer select - numRows: 2 rawDataSize: 186 +explain extended select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp; + +-- inner select - numRows: 2 rawDataSize: 24 +-- outer select - numRows: 2 rawDataSize: 16 +explain extended select x from (select i1,11.0 as x from alltypes_orc limit 10) temp; + +-- inner select - numRows: 2 rawDataSize: 104 +-- outer select - numRows: 2 rawDataSize: 186 +explain extended select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp; + +-- inner select - numRows: 2 rawDataSize: 186 +-- middle select - numRows: 2 rawDataSize: 178 +-- outer select - numRows: 2 rawDataSize: 194 +explain extended select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2; + +-- This test is for FILTER operator where filter expression is a boolean column +-- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc where bo1; + +-- numRows: 0 rawDataSize: 0 +explain extended select bo1 from alltypes_orc where !bo1; diff --git ql/src/test/queries/clientpositive/annotate_stats_table.q ql/src/test/queries/clientpositive/annotate_stats_table.q new file mode 100644 index 0000000..1c7d163 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_table.q @@ -0,0 +1,52 @@ +create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile; + +create table if not exists emp_orc like emp_staging; +alter table emp_orc set fileformat orc; + +-- basicStatState: NONE colStatState: NONE +explain extended select * from emp_orc; + +LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging; + +set hive.stats.autogather=false; + +insert overwrite table emp_orc select * from emp_staging; + +-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL colStatState: NONE +explain extended select * from emp_orc; + +-- table level analyze statistics +analyze table emp_orc compute statistics; + +-- basicStatState: COMPLETE colStatState: NONE +explain extended select * from emp_orc; + +-- column level partial statistics +analyze table emp_orc compute statistics for columns deptid; + +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select * from emp_orc; + +-- all selected columns have statistics +-- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select deptid from emp_orc; + +-- column level complete statistics +analyze table emp_orc compute statistics for columns lastname,deptid; + +-- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select * from emp_orc; + +-- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select lastname from emp_orc; + +-- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select deptid from emp_orc; + +-- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select lastname,deptid from emp_orc; diff --git ql/src/test/queries/clientpositive/annotate_stats_union.q ql/src/test/queries/clientpositive/annotate_stats_union.q new file mode 100644 index 0000000..726b048 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_union.q @@ -0,0 +1,53 @@ +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- numRows: 8 rawDataSize: 688 +explain extended select state from loc_orc; + +-- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp; + +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc; + +-- numRows: 16 rawDataSize: 1592 +explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp; + +create database test; +use test; +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +analyze table loc_staging compute statistics; +analyze table loc_staging compute statistics for columns state,locid,zip,year; +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp; + +-- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp; diff --git ql/src/test/results/clientpositive/annotate_stats_filter.q.out ql/src/test/results/clientpositive/annotate_stats_filter.q.out new file mode 100644 index 0000000..7c1ebd3 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -0,0 +1,2470 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- column stats are not COMPLETE, so stats are not updated +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc where state='OH' +PREHOOK: type: QUERY +POSTHOOK: query: -- column stats are not COMPLETE, so stats are not updated +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc where state='OH' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL state) 'OH')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (state = 'OH') + type: boolean + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- state column has 5 distincts. numRows/countDistincts +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where state='OH' +PREHOOK: type: QUERY +POSTHOOK: query: -- state column has 5 distincts. numRows/countDistincts +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where state='OH' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL state) 'OH')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (state = 'OH') + type: boolean + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- not equals comparison shouldn't affect number of rows. rawDataSize is 792 and not 796 because of rounding off issue with avgColLen. avgColLen uses integers and not double. +-- numRows: 8 rawDataSize: 804 +explain extended select * from loc_orc where state!='OH' +PREHOOK: type: QUERY +POSTHOOK: query: -- not equals comparison shouldn't affect number of rows. rawDataSize is 792 and not 796 because of rounding off issue with avgColLen. avgColLen uses integers and not double. +-- numRows: 8 rawDataSize: 804 +explain extended select * from loc_orc where state!='OH' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (!= (TOK_TABLE_OR_COL state) 'OH')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (state <> 'OH') + type: boolean + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select * from loc_orc where state<>'OH' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from loc_orc where state<>'OH' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (<> (TOK_TABLE_OR_COL state) 'OH')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (state <> 'OH') + type: boolean + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- nulls are treated as constant equality comparison +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where zip is null +PREHOOK: type: QUERY +POSTHOOK: query: -- nulls are treated as constant equality comparison +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where zip is null +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL zip))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: zip is null + type: boolean + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where !(zip is not null) +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where !(zip is not null) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL zip)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not zip is not null) + type: boolean + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- not nulls are treated as inverse of nulls +-- numRows: 7 rawDataSize: 702 +explain extended select * from loc_orc where zip is not null +PREHOOK: type: QUERY +POSTHOOK: query: -- not nulls are treated as inverse of nulls +-- numRows: 7 rawDataSize: 702 +explain extended select * from loc_orc where zip is not null +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL zip))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: zip is not null + type: boolean + Statistics: + numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 7 rawDataSize: 702 +explain extended select * from loc_orc where !(zip is null) +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 7 rawDataSize: 702 +explain extended select * from loc_orc where !(zip is null) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL zip)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not zip is null) + type: boolean + Statistics: + numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- NOT evaluation. true will pass all rows, false will not pass any rows +-- numRows: 8 rawDataSize: 804 +explain extended select * from loc_orc where !false +PREHOOK: type: QUERY +POSTHOOK: query: -- NOT evaluation. true will pass all rows, false will not pass any rows +-- numRows: 8 rawDataSize: 804 +explain extended select * from loc_orc where !false +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! false)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not false) + type: boolean + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where !true +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where !true +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! true)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not true) + type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- OR evaluation. 1 row for OH and 1 row for CA +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where state='OH' or state='CA' +PREHOOK: type: QUERY +POSTHOOK: query: -- OR evaluation. 1 row for OH and 1 row for CA +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where state='OH' or state='CA' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (= (TOK_TABLE_OR_COL state) 'OH') (= (TOK_TABLE_OR_COL state) 'CA'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: ((state = 'OH') or (state = 'CA')) + type: boolean + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2 +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where year=2001 and year is null +PREHOOK: type: QUERY +POSTHOOK: query: -- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2 +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where year=2001 and year is null +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL year) 2001) (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL year)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: ((year = 2001) and year is null) + type: boolean + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where year=2001 and state='OH' and state='FL' +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where year=2001 and state='OH' and state='FL' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL year) 2001) (= (TOK_TABLE_OR_COL state) 'OH')) (= (TOK_TABLE_OR_COL state) 'FL'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (((year = 2001) and (state = 'OH')) and (state = 'FL')) + type: boolean + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- AND and OR together. left expr will yield 1 row and right will yield 1 row +-- numRows: 3 rawDataSize: 306 +explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA') +PREHOOK: type: QUERY +POSTHOOK: query: -- AND and OR together. left expr will yield 1 row and right will yield 1 row +-- numRows: 3 rawDataSize: 306 +explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA') +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (and (= (TOK_TABLE_OR_COL year) 2001) (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL year))) (= (TOK_TABLE_OR_COL state) 'CA'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (((year = 2001) and year is null) or (state = 'CA')) + type: boolean + Statistics: + numRows: 3 dataSize: 306 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 3 dataSize: 306 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 3 dataSize: 306 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- AND and OR together. left expr will yield 8 rows and right will yield 1 row +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA') +PREHOOK: type: QUERY +POSTHOOK: query: -- AND and OR together. left expr will yield 8 rows and right will yield 1 row +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA') +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (or (= (TOK_TABLE_OR_COL year) 2001) (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL year))) (= (TOK_TABLE_OR_COL state) 'CA'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (((year = 2001) or year is null) and (state = 'CA')) + type: boolean + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- all inequality conditions rows/3 is the rules +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where locid < 30 +PREHOOK: type: QUERY +POSTHOOK: query: -- all inequality conditions rows/3 is the rules +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where locid < 30 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL locid) 30)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (locid < 30) + type: boolean + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select * from loc_orc where locid > 30 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from loc_orc where locid > 30 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL locid) 30)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (locid > 30) + type: boolean + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select * from loc_orc where locid <= 30 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from loc_orc where locid <= 30 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (<= (TOK_TABLE_OR_COL locid) 30)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (locid <= 30) + type: boolean + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select * from loc_orc where locid >= 30 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from loc_orc where locid >= 30 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (>= (TOK_TABLE_OR_COL locid) 30)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (locid >= 30) + type: boolean + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out new file mode 100644 index 0000000..ee8e557 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -0,0 +1,1750 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- only one distinct value in year column + 1 NULL value +-- map-side GBY: numRows: 8 (map-side will not do any reduction) +-- reduce-side GBY: numRows: 2 +explain extended select year from loc_orc group by year +PREHOOK: type: QUERY +POSTHOOK: query: -- only one distinct value in year column + 1 NULL value +-- map-side GBY: numRows: 8 (map-side will not do any reduction) +-- reduce-side GBY: numRows: 2 +explain extended select year from loc_orc group by year +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL year))) (TOK_GROUPBY (TOK_TABLE_OR_COL year)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: year + type: int + outputColumnNames: year + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: year + type: int + mode: hash + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 28 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + Statistics: + numRows: 8 dataSize: 28 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: mergepartial + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY: numRows: 8 +-- reduce-side GBY: numRows: 4 +explain extended select state,locid from loc_orc group by state,locid +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY: numRows: 8 +-- reduce-side GBY: numRows: 4 +explain extended select state,locid from loc_orc group by state,locid +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + mode: hash + outputColumnNames: _col0, _col1 + Statistics: + numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + Statistics: + numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: + numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain extended select state,locid from loc_orc group by state,locid with cube +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain extended select state,locid from loc_orc group by state,locid with cube +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 32 dataSize: 3184 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 32 dataSize: 3184 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 16 dataSize: 2800 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 16 dataSize: 1440 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1440 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain extended select state,locid from loc_orc group by state,locid with rollup +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain extended select state,locid from loc_orc group by state,locid with rollup +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 24 dataSize: 2388 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 24 dataSize: 2388 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 12 dataSize: 2100 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 12 dataSize: 1080 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 12 dataSize: 1080 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state)) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state)) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 4 dataSize: 700 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL locid))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 8 dataSize: 1400 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL locid)) TOK_GROUPING_SETS_EXPRESSION))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 24 dataSize: 2388 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 24 dataSize: 2388 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 12 dataSize: 2100 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 12 dataSize: 1080 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 12 dataSize: 1080 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL locid)) TOK_GROUPING_SETS_EXPRESSION))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 32 dataSize: 3184 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 32 dataSize: 3184 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 16 dataSize: 2800 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 16 dataSize: 1440 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1440 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction) +-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2) +explain extended select year from loc_orc group by year +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction) +-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2) +explain extended select year from loc_orc group by year +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL year))) (TOK_GROUPBY (TOK_TABLE_OR_COL year)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: year + type: int + outputColumnNames: year + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: year + type: int + mode: hash + outputColumnNames: _col0 + Statistics: + numRows: 80 dataSize: 280 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + Statistics: + numRows: 80 dataSize: 280 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: mergepartial + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) +explain extended select state,locid from loc_orc group by state,locid with cube +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) +explain extended select state,locid from loc_orc group by state,locid with cube +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 320 dataSize: 31840 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 320 dataSize: 31840 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 42 dataSize: 7350 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 42 dataSize: 3780 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 42 dataSize: 3780 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/annotate_stats_join.q.out ql/src/test/results/clientpositive/annotate_stats_join.q.out new file mode 100644 index 0000000..f705f31 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -0,0 +1,1813 @@ +PREHOOK: query: create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@emp_staging +PREHOOK: query: create table if not exists dept_staging ( + deptid int, + deptname string +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists dept_staging ( + deptid int, + deptname string +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dept_staging +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table if not exists emp_orc like emp_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists emp_orc like emp_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@emp_orc +PREHOOK: query: alter table emp_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc +POSTHOOK: query: alter table emp_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc +PREHOOK: query: create table if not exists dept_orc like dept_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists dept_orc like dept_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dept_orc +PREHOOK: query: alter table dept_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@dept_orc +PREHOOK: Output: default@dept_orc +POSTHOOK: query: alter table dept_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@dept_orc +POSTHOOK: Output: default@dept_orc +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@emp_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@emp_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@dept_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dept_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table emp_orc select * from emp_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_staging +PREHOOK: Output: default@emp_orc +POSTHOOK: query: insert overwrite table emp_orc select * from emp_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_staging +POSTHOOK: Output: default@emp_orc +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: insert overwrite table dept_orc select * from dept_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_staging +PREHOOK: Output: default@dept_orc +POSTHOOK: query: insert overwrite table dept_orc select * from dept_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_staging +POSTHOOK: Output: default@dept_orc +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table dept_orc compute statistics for columns deptname,deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table dept_orc compute statistics for columns deptname,deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- number of rows +-- emp_orc - 6 +-- dept_orc - 4 +-- loc_orc - 8 + +-- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows) +-- emp_orc.deptid - 3 +-- emp_orc.lastname - 7 +-- dept_orc.deptid - 6 +-- dept_orc.deptname - 5 +-- loc_orc.locid - 6 +-- loc_orc.state - 7 + +-- Expected output rows: 4 +-- Reason: #rows = (6*4)/max(3,6) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +PREHOOK: type: QUERY +POSTHOOK: query: -- number of rows +-- emp_orc - 6 +-- dept_orc - 4 +-- loc_orc - 8 + +-- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows) +-- emp_orc.deptid - 3 +-- emp_orc.lastname - 7 +-- dept_orc.deptid - 6 +-- dept_orc.deptname - 5 +-- loc_orc.locid - 6 +-- loc_orc.state - 7 + +-- Expected output rows: 4 +-- Reason: #rows = (6*4)/max(3,6) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 1 + value expressions: + expr: deptid + type: int + expr: deptname + type: string + e + TableScan + alias: e + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: + numRows: 4 dataSize: 760 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 4 dataSize: 760 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 4 dataSize: 760 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- 3 way join +-- Expected output rows: 4 +-- Reason: #rows = (6*4*6)/max(3,6)*max(6,3) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid) +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 way join +-- Expected output rows: 4 +-- Reason: #rows = (6*4*6)/max(3,6)*max(6,3) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid))) (TOK_TABREF (TOK_TABNAME emp_orc) e1) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL e1) deptid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 1 + value expressions: + expr: deptid + type: int + expr: deptname + type: string + e + TableScan + alias: e + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + e1 + TableScan + alias: e1 + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 2 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e1, e] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Statistics: + numRows: 4 dataSize: 1136 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: string + expr: _col8 + type: string + expr: _col9 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: + numRows: 4 dataSize: 1136 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 4 dataSize: 1136 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types string:int:int:string:string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- Expected output rows: 5 +-- Reason: #rows = (6*4*8)/max(3,6)*max(6,6) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid) +PREHOOK: type: QUERY +POSTHOOK: query: -- Expected output rows: 5 +-- Reason: #rows = (6*4*8)/max(3,6)*max(6,6) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid))) (TOK_TABREF (TOK_TABNAME loc_orc) l) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL l) locid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 1 + value expressions: + expr: deptid + type: int + expr: deptname + type: string + e + TableScan + alias: e + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + l + TableScan + alias: l + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: locid + type: int + sort order: + + Map-reduce partition columns: + expr: locid + type: int + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 2 + value expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e] + /loc_orc [l] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11 + Statistics: + numRows: 5 dataSize: 1449 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: string + expr: _col8 + type: string + expr: _col9 + type: int + expr: _col10 + type: bigint + expr: _col11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: + numRows: 5 dataSize: 1449 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 5 dataSize: 1449 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types string:int:int:string:string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- join keys of different types +-- Expected output rows: 4 +-- Reason: #rows = (6*4*8)/max(3,6)*max(6,7) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state) +PREHOOK: type: QUERY +POSTHOOK: query: -- join keys of different types +-- Expected output rows: 4 +-- Reason: #rows = (6*4*8)/max(3,6)*max(6,7) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid))) (TOK_TABREF (TOK_TABNAME loc_orc) l) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL l) state)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: UDFToDouble(deptid) + type: double + sort order: + + Map-reduce partition columns: + expr: UDFToDouble(deptid) + type: double + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 1 + value expressions: + expr: deptid + type: int + expr: deptname + type: string + e + TableScan + alias: e + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: UDFToDouble(deptid) + type: double + sort order: + + Map-reduce partition columns: + expr: UDFToDouble(deptid) + type: double + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + l + TableScan + alias: l + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: UDFToDouble(state) + type: double + sort order: + + Map-reduce partition columns: + expr: UDFToDouble(state) + type: double + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 2 + value expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e] + /loc_orc [l] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11 + Statistics: + numRows: 4 dataSize: 1156 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: string + expr: _col8 + type: string + expr: _col9 + type: int + expr: _col10 + type: bigint + expr: _col11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: + numRows: 4 dataSize: 1156 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 4 dataSize: 1156 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types string:int:int:string:string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- multi-attribute join +-- Expected output rows: 0 +-- Reason: #rows = (6*4)/max(3,6)*max(7,5) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) +PREHOOK: type: QUERY +POSTHOOK: query: -- multi-attribute join +-- Expected output rows: 0 +-- Reason: #rows = (6*4)/max(3,6)*max(7,5) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (and (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)) (= (. (TOK_TABLE_OR_COL e) lastname) (. (TOK_TABLE_OR_COL d) deptname))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + expr: deptname + type: string + sort order: ++ + Map-reduce partition columns: + expr: deptid + type: int + expr: deptname + type: string + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 1 + value expressions: + expr: deptid + type: int + expr: deptname + type: string + e + TableScan + alias: e + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + expr: lastname + type: string + sort order: ++ + Map-reduce partition columns: + expr: deptid + type: int + expr: lastname + type: string + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- 3 way and multi-attribute join +-- Expected output rows: 0 +-- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state) +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 way and multi-attribute join +-- Expected output rows: 0 +-- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (and (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)) (= (. (TOK_TABLE_OR_COL e) lastname) (. (TOK_TABLE_OR_COL d) deptname)))) (TOK_TABREF (TOK_TABNAME loc_orc) l) (and (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL l) locid)) (= (. (TOK_TABLE_OR_COL e) lastname) (. (TOK_TABLE_OR_COL l) state))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + expr: deptname + type: string + sort order: ++ + Map-reduce partition columns: + expr: deptid + type: int + expr: deptname + type: string + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 1 + value expressions: + expr: deptid + type: int + expr: deptname + type: string + e + TableScan + alias: e + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + expr: lastname + type: string + sort order: ++ + Map-reduce partition columns: + expr: deptid + type: int + expr: lastname + type: string + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + l + TableScan + alias: l + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: locid + type: int + expr: state + type: string + sort order: ++ + Map-reduce partition columns: + expr: locid + type: int + expr: state + type: string + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 2 + value expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e] + /loc_orc [l] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: string + expr: _col8 + type: string + expr: _col9 + type: int + expr: _col10 + type: bigint + expr: _col11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types string:int:int:string:string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/annotate_stats_limit.q.out ql/src/test/results/clientpositive/annotate_stats_limit.q.out new file mode 100644 index 0000000..c6a446c --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_limit.q.out @@ -0,0 +1,237 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_orc compute statistics for columns state, locid, zip, year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state, locid, zip, year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + ListSink + + +PREHOOK: query: -- numRows: 4 rawDataSize: 396 +explain extended select * from loc_orc limit 4 +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 4 rawDataSize: 396 +explain extended select * from loc_orc limit 4 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 4))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 4 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 4 dataSize: 396 basicStatsState: COMPLETE colStatsState: COMPLETE + ListSink + + +PREHOOK: query: -- greater than the available number of rows +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc limit 16 +PREHOOK: type: QUERY +POSTHOOK: query: -- greater than the available number of rows +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc limit 16 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 16))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 16 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + ListSink + + +PREHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc limit 0 +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 0))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + ListSink + + diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out new file mode 100644 index 0000000..914a114 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -0,0 +1,1764 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table if not exists loc_orc ( + state string, + locid int, + zip bigint +) partitioned by(year int) stored as orc +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_orc ( + state string, + locid int, + zip bigint +) partitioned by(year int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: -- basicStatState: NONE colStatState: NONE +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: NONE colStatState: NONE +explain extended select * from loc_orc +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + ListSink + + +PREHOOK: query: insert overwrite table loc_orc partition(year) select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc partition(year) select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc@year=2001 +POSTHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE false + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows -1 + partition_columns year + rawDataSize -1 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE false + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows -1 + partition_columns year + rawDataSize -1 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 0 dataSize: 727 basicStatsState: PARTIAL colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 727 basicStatsState: PARTIAL colStatsState: NONE + ListSink + + +PREHOOK: query: -- partition level analyze statistics for specific parition +analyze table loc_orc partition(year=2001) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +PREHOOK: Input: default@loc_orc@year=2001 +PREHOOK: Output: default@loc_orc +PREHOOK: Output: default@loc_orc@year=2001 +POSTHOOK: query: -- partition level analyze statistics for specific parition +analyze table loc_orc partition(year=2001) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +POSTHOOK: Input: default@loc_orc@year=2001 +POSTHOOK: Output: default@loc_orc +POSTHOOK: Output: default@loc_orc@year=2001 +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__')))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE false + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows -1 + partition_columns year + rawDataSize -1 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 0 dataSize: 325 basicStatsState: PARTIAL colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 325 basicStatsState: PARTIAL colStatsState: NONE + ListSink + + +PREHOOK: query: -- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE false + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows -1 + partition_columns year + rawDataSize -1 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 7 dataSize: 727 basicStatsState: PARTIAL colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 7 dataSize: 727 basicStatsState: PARTIAL colStatsState: NONE + ListSink + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year=2001 +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year=2001 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL year) 2001)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 7 dataSize: 402 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 7 dataSize: 402 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- partition level analyze statistics for all partitions +analyze table loc_orc partition(year) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +PREHOOK: Input: default@loc_orc@year=2001 +PREHOOK: Input: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +PREHOOK: Output: default@loc_orc +PREHOOK: Output: default@loc_orc@year=2001 +PREHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: query: -- partition level analyze statistics for all partitions +analyze table loc_orc partition(year) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +POSTHOOK: Input: default@loc_orc@year=2001 +POSTHOOK: Input: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Output: default@loc_orc +POSTHOOK: Output: default@loc_orc@year=2001 +POSTHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__')))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year=2001 or year='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year=2001 or year='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (= (TOK_TABLE_OR_COL year) 2001) (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__'))))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- both partitions will be pruned +-- basicStatState: NONE colStatState: NONE +explain extended select * from loc_orc where year=2001 and year='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +POSTHOOK: query: -- both partitions will be pruned +-- basicStatState: NONE colStatState: NONE +explain extended select * from loc_orc where year=2001 and year='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL year) 2001) (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__'))))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: ((year = 2001) and (year = '__HIVE_DEFAULT_PARTITION__')) + type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + ListSink + + +PREHOOK: query: -- partition level partial column statistics +analyze table loc_orc partition(year=2001) compute statistics for columns state,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +PREHOOK: Input: default@loc_orc@year=2001 +#### A masked pattern was here #### +POSTHOOK: query: -- partition level partial column statistics +analyze table loc_orc partition(year=2001) compute statistics for columns state,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +POSTHOOK: Input: default@loc_orc@year=2001 +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select zip from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select zip from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL zip))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: zip + type: bigint + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc +#### A masked pattern was here #### + Partition + base file name: year=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=2001 [loc_orc] + /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc +#### A masked pattern was here #### + Partition + base file name: year=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=2001 [loc_orc] + /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state,locid from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state,locid from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc +#### A masked pattern was here #### + Partition + base file name: year=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=2001 [loc_orc] + /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select state,locid from loc_orc where year=2001 +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select state,locid from loc_orc where year=2001 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_WHERE (= (TOK_TABLE_OR_COL year) 2001)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 7 dataSize: 402 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 7 dataSize: 630 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 7 dataSize: 630 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=2001 [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select state,locid from loc_orc where year!=2001 +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select state,locid from loc_orc where year!=2001 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_WHERE (!= (TOK_TABLE_OR_COL year) 2001)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (year <> 2001) + type: boolean + Statistics: + numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: PARTIAL + ListSink + + diff --git ql/src/test/results/clientpositive/annotate_stats_select.q.out ql/src/test/results/clientpositive/annotate_stats_select.q.out new file mode 100644 index 0000000..4f11d7f --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -0,0 +1,4400 @@ +PREHOOK: query: create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@alltypes +PREHOOK: query: create table alltypes_orc like alltypes +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table alltypes_orc like alltypes +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: alter table alltypes_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: alter table alltypes_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: load data local inpath '../../data/files/alltypes.txt' overwrite into table alltypes +PREHOOK: type: LOAD +PREHOOK: Output: default@alltypes +POSTHOOK: query: load data local inpath '../../data/files/alltypes.txt' overwrite into table alltypes +POSTHOOK: type: LOAD +POSTHOOK: Output: default@alltypes +PREHOOK: query: insert overwrite table alltypes_orc select * from alltypes +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: insert overwrite table alltypes_orc select * from alltypes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes +POSTHOOK: Output: default@alltypes_orc +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + expr: ti1 + type: tinyint + expr: si1 + type: smallint + expr: i1 + type: int + expr: bi1 + type: bigint + expr: f1 + type: float + expr: d1 + type: double + expr: de1 + type: decimal(10,0) + expr: ts1 + type: timestamp + expr: da1 + type: timestamp + expr: s1 + type: string + expr: m1 + type: map + expr: l1 + type: array + expr: st1 + type: struct + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- statistics for complex types are not supported yet +analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1,s1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +POSTHOOK: query: -- statistics for complex types are not supported yet +analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1,s1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +PREHOOK: query: -- numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + expr: ti1 + type: tinyint + expr: si1 + type: smallint + expr: i1 + type: int + expr: bi1 + type: bigint + expr: f1 + type: float + expr: d1 + type: double + expr: de1 + type: decimal(10,0) + expr: ts1 + type: timestamp + expr: da1 + type: timestamp + expr: s1 + type: string + expr: m1 + type: map + expr: l1 + type: array + expr: st1 + type: struct + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: PARTIAL + ListSink + + +PREHOOK: query: -- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bo1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types boolean + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- col alias renaming +-- numRows: 2 rawDataSize: 8 +explain extended select i1 as int1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- col alias renaming +-- numRows: 2 rawDataSize: 8 +explain extended select i1 as int1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1) int1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: i1 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 174 +explain extended select s1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 174 +explain extended select s1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL s1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: s1 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 174 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 174 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- column statistics for complex types unsupported and so statistics will not be updated +-- numRows: 2 rawDataSize: 1514 +explain extended select m1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- column statistics for complex types unsupported and so statistics will not be updated +-- numRows: 2 rawDataSize: 1514 +explain extended select m1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: m1 + type: map + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types map + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 246 +explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 246 +explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bo1)) (TOK_SELEXPR (TOK_TABLE_OR_COL ti1)) (TOK_SELEXPR (TOK_TABLE_OR_COL si1)) (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR (TOK_TABLE_OR_COL bi1)) (TOK_SELEXPR (TOK_TABLE_OR_COL f1)) (TOK_SELEXPR (TOK_TABLE_OR_COL d1)) (TOK_SELEXPR (TOK_TABLE_OR_COL s1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + expr: ti1 + type: tinyint + expr: si1 + type: smallint + expr: i1 + type: int + expr: bi1 + type: bigint + expr: f1 + type: float + expr: d1 + type: double + expr: s1 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: + numRows: 2 dataSize: 246 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 246 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types boolean:tinyint:smallint:int:bigint:float:double:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 0 +explain extended select null from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 0 +explain extended select null from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_NULL)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: null + type: string + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 8 +explain extended select 11 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 8 +explain extended select 11 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 11)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: 11 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select 11L from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select 11L from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 11L)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: 11 + type: bigint + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select 11.0 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select 11.0 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 11.0)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: 11.0 + type: double + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 178 +explain extended select "hello" from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 178 +explain extended select "hello" from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR "hello")))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: 'hello' + type: string + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 96 +explain extended select unbase64("0xe23") from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 96 +explain extended select unbase64("0xe23") from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION unbase64 "0xe23"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: unbase64('0xe23') + type: binary + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 96 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 96 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types binary + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_TINYINT "1")) (TOK_SELEXPR (TOK_FUNCTION TOK_SMALLINT "20"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: UDFToByte('1') + type: tinyint + expr: UDFToShort('20') + type: smallint + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types tinyint:smallint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 80 +explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 80 +explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP "1970-12-31 15:59:58.174"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: CAST( '1970-12-31 15:59:58.174' AS TIMESTAMP) + type: timestamp + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 80 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 80 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types timestamp + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DATE "1970-12-31 15:59:58.174"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: CAST( '1970-12-31 15:59:58.174' AS DATE) + type: date + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types date + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 224 +explain extended select cast("58.174" as DECIMAL) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 224 +explain extended select cast("58.174" as DECIMAL) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DECIMAL "58.174"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: CAST( '58.174' AS decimal(10,0)) + type: decimal(10,0) + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 224 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 224 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types decimal(10,0) + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select array(1,2,3) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select array(1,2,3) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION array 1 2 3))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: array(1,2,3) + type: array + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types array + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 1508 +explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 1508 +explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION str_to_map "a=1 b=2 c=3" " " "="))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: str_to_map('a=1 b=2 c=3',' ','=') + type: map + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 1508 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 1508 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types map + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION NAMED_STRUCT "a" 11 "b" 11))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: named_struct('a',11,'b',11) + type: struct + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 250 +explain extended select CREATE_UNION(0, "hello") from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 250 +explain extended select CREATE_UNION(0, "hello") from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION CREATE_UNION 0 "hello"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: create_union(0,'hello') + type: uniontype + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 250 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 250 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types uniontype + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 8 +explain extended select count(*) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 8 +explain extended select count(*) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 8 +explain extended select count(1) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 8 +explain extended select count(1) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- column statistics for complex column types will be missing. data size will be calculated from available column statistics +-- numRows: 2 rawDataSize: 254 +explain extended select *,11 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- column statistics for complex column types will be missing. data size will be calculated from available column statistics +-- numRows: 2 rawDataSize: 254 +explain extended select *,11 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR 11)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + expr: ti1 + type: tinyint + expr: si1 + type: smallint + expr: i1 + type: int + expr: bi1 + type: bigint + expr: f1 + type: float + expr: d1 + type: double + expr: de1 + type: decimal(10,0) + expr: ts1 + type: timestamp + expr: da1 + type: timestamp + expr: s1 + type: string + expr: m1 + type: map + expr: l1 + type: array + expr: st1 + type: struct + expr: 11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: + numRows: 2 dataSize: 254 basicStatsState: COMPLETE colStatsState: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 254 basicStatsState: COMPLETE colStatsState: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- subquery selects +-- inner select - numRows: 2 rawDataSize: 8 +-- outer select - numRows: 2 rawDataSize: 8 +explain extended select i1 from (select i1 from alltypes_orc limit 10) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- subquery selects +-- inner select - numRows: 2 rawDataSize: 8 +-- outer select - numRows: 2 rawDataSize: 8 +explain extended select i1 from (select i1 from alltypes_orc limit 10) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1))) (TOK_LIMIT 10))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + temp:alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: i1 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [temp:alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- inner select - numRows: 2 rawDataSize: 16 +-- outer select - numRows: 2 rawDataSize: 8 +explain extended select i1 from (select i1,11 from alltypes_orc limit 10) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- inner select - numRows: 2 rawDataSize: 16 +-- outer select - numRows: 2 rawDataSize: 8 +explain extended select i1 from (select i1,11 from alltypes_orc limit 10) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR 11)) (TOK_LIMIT 10))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + temp:alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: i1 + type: int + expr: 11 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [temp:alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- inner select - numRows: 2 rawDataSize: 16 +-- outer select - numRows: 2 rawDataSize: 186 +explain extended select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- inner select - numRows: 2 rawDataSize: 16 +-- outer select - numRows: 2 rawDataSize: 186 +explain extended select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR 11)) (TOK_LIMIT 10))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR "hello")))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + temp:alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: i1 + type: int + expr: 11 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [temp:alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: int + expr: 'hello' + type: string + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- inner select - numRows: 2 rawDataSize: 24 +-- outer select - numRows: 2 rawDataSize: 16 +explain extended select x from (select i1,11.0 as x from alltypes_orc limit 10) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- inner select - numRows: 2 rawDataSize: 24 +-- outer select - numRows: 2 rawDataSize: 16 +explain extended select x from (select i1,11.0 as x from alltypes_orc limit 10) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR 11.0 x)) (TOK_LIMIT 10))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + temp:alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: i1 + type: int + expr: 11.0 + type: double + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 24 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 24 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 2 dataSize: 24 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: double + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [temp:alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 2 dataSize: 24 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 24 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col1 + type: double + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- inner select - numRows: 2 rawDataSize: 104 +-- outer select - numRows: 2 rawDataSize: 186 +explain extended select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- inner select - numRows: 2 rawDataSize: 104 +-- outer select - numRows: 2 rawDataSize: 186 +explain extended select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1) x) (TOK_SELEXPR (TOK_FUNCTION unbase64 "0xe23") ub)) (TOK_LIMIT 10))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x)) (TOK_SELEXPR "hello")))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + temp:alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: i1 + type: int + expr: unbase64('0xe23') + type: binary + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 104 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 104 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 2 dataSize: 104 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: binary + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [temp:alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 2 dataSize: 104 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 104 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: int + expr: 'hello' + type: string + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- inner select - numRows: 2 rawDataSize: 186 +-- middle select - numRows: 2 rawDataSize: 178 +-- outer select - numRows: 2 rawDataSize: 194 +explain extended select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2 +PREHOOK: type: QUERY +POSTHOOK: query: -- inner select - numRows: 2 rawDataSize: 186 +-- middle select - numRows: 2 rawDataSize: 178 +-- outer select - numRows: 2 rawDataSize: 194 +explain extended select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR "hello" hell)) (TOK_LIMIT 10))) in1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL hell) h)) (TOK_LIMIT 10))) in2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL h)) (TOK_SELEXPR 11.0)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + in2:in1:alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: i1 + type: int + expr: 'hello' + type: string + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [in2:in1:alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + GatherStats: false + Reduce Output Operator + sort order: + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: string + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: 11.0 + type: double + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 194 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 194 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- This test is for FILTER operator where filter expression is a boolean column +-- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc where bo1 +PREHOOK: type: QUERY +POSTHOOK: query: -- This test is for FILTER operator where filter expression is a boolean column +-- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc where bo1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bo1))) (TOK_WHERE (TOK_TABLE_OR_COL bo1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: bo1 + type: boolean + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: bo1 + type: boolean + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types boolean + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select bo1 from alltypes_orc where !bo1 +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select bo1 from alltypes_orc where !bo1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bo1))) (TOK_WHERE (! (TOK_TABLE_OR_COL bo1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not bo1) + type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: bo1 + type: boolean + outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types boolean + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1409 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/annotate_stats_table.q.out ql/src/test/results/clientpositive/annotate_stats_table.q.out new file mode 100644 index 0000000..64e45c7 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -0,0 +1,698 @@ +PREHOOK: query: create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@emp_staging +PREHOOK: query: create table if not exists emp_orc like emp_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists emp_orc like emp_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@emp_orc +PREHOOK: query: alter table emp_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc +POSTHOOK: query: alter table emp_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc +PREHOOK: query: -- basicStatState: NONE colStatState: NONE +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: NONE colStatState: NONE +explain extended select * from emp_orc +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + ListSink + + +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@emp_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@emp_staging +PREHOOK: query: insert overwrite table emp_orc select * from emp_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_staging +PREHOOK: Output: default@emp_orc +POSTHOOK: query: insert overwrite table emp_orc select * from emp_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_staging +POSTHOOK: Output: default@emp_orc +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL colStatState: NONE +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL colStatState: NONE +explain extended select * from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + numRows: 0 dataSize: 349 basicStatsState: PARTIAL colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 349 basicStatsState: PARTIAL colStatsState: NONE + ListSink + + +PREHOOK: query: -- table level analyze statistics +analyze table emp_orc compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc +POSTHOOK: query: -- table level analyze statistics +analyze table emp_orc compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- column level partial statistics +analyze table emp_orc compute statistics for columns deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: query: -- column level partial statistics +analyze table emp_orc compute statistics for columns deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select * from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: PARTIAL + ListSink + + +PREHOOK: query: -- all selected columns have statistics +-- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select deptid from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- all selected columns have statistics +-- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select deptid from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL deptid))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + emp_orc + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: deptid + type: int + outputColumnNames: _col0 + Statistics: + numRows: 6 dataSize: 20 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 20 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /emp_orc [emp_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- column level complete statistics +analyze table emp_orc compute statistics for columns lastname,deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: query: -- column level complete statistics +analyze table emp_orc compute statistics for columns lastname,deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select * from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + ListSink + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select lastname from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select lastname from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL lastname))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + emp_orc + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + outputColumnNames: _col0 + Statistics: + numRows: 6 dataSize: 546 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 546 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /emp_orc [emp_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select deptid from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select deptid from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL deptid))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + emp_orc + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: deptid + type: int + outputColumnNames: _col0 + Statistics: + numRows: 6 dataSize: 20 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 20 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /emp_orc [emp_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select lastname,deptid from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select lastname,deptid from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL lastname)) (TOK_SELEXPR (TOK_TABLE_OR_COL deptid))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + emp_orc + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 566 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 566 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /emp_orc [emp_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/annotate_stats_union.q.out ql/src/test/results/clientpositive/annotate_stats_union.q.out new file mode 100644 index 0000000..03235e6 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_union.q.out @@ -0,0 +1,1127 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 688 +explain extended select state from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 688 +explain extended select state from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:tmp-subquery1:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + null-subquery2:tmp-subquery2:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [null-subquery1:tmp-subquery1:loc_orc, null-subquery2:tmp-subquery2:loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + ListSink + + +PREHOOK: query: -- numRows: 16 rawDataSize: 1592 +explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 16 rawDataSize: 1592 +explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:tmp-subquery1:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: bigint + expr: _col3 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + null-subquery2:tmp-subquery2:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: bigint + expr: _col3 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [null-subquery1:tmp-subquery1:loc_orc, null-subquery2:tmp-subquery2:loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: create database test +PREHOOK: type: CREATEDATABASE +POSTHOOK: query: create database test +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: use test +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: use test +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: test@loc_staging +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: test@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: test@loc_orc +PREHOOK: Output: test@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: test@loc_orc +POSTHOOK: Output: test@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: test@loc_staging +POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: test@loc_staging +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: test@loc_staging +PREHOOK: Output: test@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: test@loc_staging +POSTHOOK: Output: test@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_staging compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: test@loc_staging +PREHOOK: Output: test@loc_staging +POSTHOOK: query: analyze table loc_staging compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: test@loc_staging +POSTHOOK: Output: test@loc_staging +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_staging compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: test@loc_staging +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_staging compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: test@loc_staging +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: test@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: test@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME default loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)))))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:temp-subquery1:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + null-subquery2:temp-subquery2:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: test.loc_orc + name: test.loc_orc + Truncated Path -> Alias: + /loc_orc [null-subquery1:temp-subquery1:loc_orc] + /test.db/loc_orc [null-subquery2:temp-subquery2:loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test loc_staging))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)))))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:temp-subquery1:loc_staging + TableScan + alias: loc_staging + Statistics: + numRows: 8 dataSize: 117 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + null-subquery2:temp-subquery2:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: test.loc_orc + name: test.loc_orc +#### A masked pattern was here #### + Partition + base file name: loc_staging + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_staging + numFiles 1 + numRows 8 + rawDataSize 117 + serialization.ddl struct loc_staging { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 125 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_staging + numFiles 1 + numRows 8 + rawDataSize 117 + serialization.ddl struct loc_staging { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 125 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: test.loc_staging + name: test.loc_staging + Truncated Path -> Alias: + /test.db/loc_orc [null-subquery2:temp-subquery2:loc_orc] + /test.db/loc_staging [null-subquery1:temp-subquery1:loc_staging] + + Stage: Stage-0 + Fetch Operator + limit: -1 + +