diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 32ab3d8..2d74387 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -635,6 +635,14 @@ HIVE_STATS_NDV_ERROR("hive.stats.ndv.error", (float)20.0), HIVE_STATS_KEY_PREFIX_MAX_LENGTH("hive.stats.key.prefix.max.length", 150), HIVE_STATS_KEY_PREFIX("hive.stats.key.prefix", ""), // internal usage only + // if length of variable length data type cannot be determined this length will be used. + HIVE_STATS_MAX_VARIABLE_LENGTH("hive.stats.max.variable.length", 100), + // if number of elements in list cannot be determined, this value will be used + HIVE_STATS_LIST_NUM_ENTRIES("hive.stats.list.num.entries", 10), + // if number of elements in map cannot be determined, this value will be used + HIVE_STATS_MAP_NUM_ENTRIES("hive.stats.map.num.entries", 10), + // to accurately compute statistics for GROUPBY map side parallelism needs to be known + HIVE_STATS_MAP_SIDE_PARALLELISM("hive.stats.map.parallelism", 1), // Concurrency HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", false), diff --git data/files/alltypes.txt data/files/alltypes.txt new file mode 100644 index 0000000..d86a7bc --- /dev/null +++ data/files/alltypes.txt @@ -0,0 +1,2 @@ +true|10|100|1000|10000|4.0|20.0|2.2222|1969-12-31 15:59:58.174|1970-01-01 00:00:00|hello|k1:v1,k2:v2|100,200|{10, "foo"} +true|20|200|2000|20000|8.0|40.0|4.2222|1970-12-31 15:59:58.174|1971-01-01 00:00:00||k3:v3,k4:v4|200,300|{20, "bar"} diff --git data/files/dept.txt data/files/dept.txt new file mode 100644 index 0000000..292bee6 --- /dev/null +++ data/files/dept.txt @@ -0,0 +1,4 @@ +31|sales +33|engineering +34|clerical +35|marketing diff --git data/files/emp.txt data/files/emp.txt new file mode 100644 index 0000000..a0e76b9 --- /dev/null +++ data/files/emp.txt @@ -0,0 +1,6 @@ +Rafferty|31 +Jones|33 +Steinberg|33 +Robinson|34 +Smith|34 +John| diff --git data/files/loc.txt data/files/loc.txt new file mode 100644 index 0000000..69910b7 --- /dev/null +++ data/files/loc.txt @@ -0,0 +1,8 @@ +OH|31|43201|2001 +IO|32|43202|2001 +CA|35|43809|2001 +FL|33|54342|2001 +UT|35||2001 +CA|35|43809|2001 +|34|40000| +FL|33|54342|2001 diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index bad4f48..27117cd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -408,6 +408,8 @@ DROP_COMMAND_NOT_ALLOWED_FOR_PARTITION(30011, "Partition protected from being dropped"), COLUMNSTATSCOLLECTOR_INVALID_COLUMN(30012, "Column statistics are not supported " + "for partition columns"), + + STATISTICS_CLONING_FAILED(30013, "Cloning of statistics failed"), ; private int errorCode; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index 3b9a653..98fad54 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -1594,4 +1595,17 @@ static boolean toString(StringBuilder builder, Set visited, Operator } return false; } + + public Statistics getStatistics() { + if (conf != null) { + return conf.getStatistics(); + } + return null; + } + + public void setStatistics(Statistics stats) { + if (conf != null) { + conf.setStatistics(stats); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 0703c86..f329ae8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPruner; import org.apache.hadoop.hive.ql.optimizer.pcr.PartitionConditionRemover; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; +import org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcessor; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -114,6 +115,9 @@ public void initialize(HiveConf hiveConf) { if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES)) { transformations.add(new StatsOptimizer()); } + if (pctx.getContext().getExplain()) { + transformations.add(new AnnotateWithStatistics()); + } transformations.add(new SimpleFetchOptimizer()); // must be called last if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEFETCHTASKAGGR)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateStatsProcCtx.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateStatsProcCtx.java new file mode 100644 index 0000000..181c12d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateStatsProcCtx.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.stats.annotation; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.plan.Statistics; + +public class AnnotateStatsProcCtx implements NodeProcessorCtx { + + private ParseContext pctx; + private HiveConf conf; + private Statistics andExprStats = null; + + public AnnotateStatsProcCtx(ParseContext pctx) { + this.setParseContext(pctx); + if(pctx != null) { + this.setConf(pctx.getConf()); + } else { + this.setConf(null); + } + } + + public HiveConf getConf() { + return conf; + } + + public void setConf(HiveConf conf) { + this.conf = conf; + } + + public ParseContext getParseContext() { + return pctx; + } + + public void setParseContext(ParseContext pctx) { + this.pctx = pctx; + } + + public Statistics getAndExprStats() { + return andExprStats; + } + + public void setAndExprStats(Statistics andExprStats) { + this.andExprStats = andExprStats; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java new file mode 100644 index 0000000..aac447a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.stats.annotation; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; +import org.apache.hadoop.hive.ql.exec.DemuxOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.LimitOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.PreOrderWalker; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.optimizer.Transform; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +public class AnnotateWithStatistics implements Transform { + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + AnnotateStatsProcCtx aspCtx = new AnnotateStatsProcCtx(pctx); + + // create a walker which walks the tree in a DFS manner while maintaining the + // operator stack. The dispatcher generates the plan from the operator tree + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("TS", TableScanOperator.getOperatorName() + "%"), + StatsRulesProcFactory.getTableScanRule()); + opRules.put(new RuleRegExp("SEL", SelectOperator.getOperatorName() + "%"), + StatsRulesProcFactory.getSelectRule()); + opRules.put(new RuleRegExp("FIL", FilterOperator.getOperatorName() + "%"), + StatsRulesProcFactory.getFilterRule()); + opRules.put(new RuleRegExp("GBY", GroupByOperator.getOperatorName() + "%"), + StatsRulesProcFactory.getGroupByRule()); + opRules.put(new RuleRegExp("JOIN", CommonJoinOperator.getOperatorName() + "%|" + + MapJoinOperator.getOperatorName() + "%"), StatsRulesProcFactory.getJoinRule()); + opRules.put(new RuleRegExp("LIM", LimitOperator.getOperatorName() + "%"), + StatsRulesProcFactory.getLimitRule()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(StatsRulesProcFactory.getDefaultRule(), opRules, + aspCtx); + GraphWalker ogw = new PreOrderWalker(disp); + + // Create a list of topop nodes + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + + return pctx; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java new file mode 100644 index 0000000..dbbee1d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -0,0 +1,1004 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.stats.annotation; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; +import org.apache.hadoop.hive.ql.exec.DemuxOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.LimitOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.stats.StatsUtils; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; +import org.apache.hadoop.hive.serde.serdeConstants; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +public class StatsRulesProcFactory { + + /** + * Collect basic statistics like number of rows, data size and column level + * statistics from the table. Also sets the state of the available statistics. + * Basic and column statistics can have one of the following states + * COMPLETE, PARTIAL, NONE. In case of partitioned table, the basic and column + * stats are aggregated together to table level statistics. + * + */ + public static class TableScanStatsRule extends DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + TableScanOperator tsop = (TableScanOperator) nd; + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + PrunedPartitionList partList = null; + try { + partList = aspCtx.getParseContext().getPrunedPartitions(tsop.getName(), tsop); + } catch (HiveException e1) { + throw new SemanticException(e1); + } + Table table = aspCtx.getParseContext().getTopToTable().get(tsop); + + // gather statistics for the first time and the attach it to table scan operator + Statistics stats = StatsUtils.collectStatistics(aspCtx.getConf(), partList, table, tsop); + try { + tsop.setStatistics(stats.clone()); + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + return null; + } + } + + /** + * SELECT operator doesn't change the number of rows emitted from the parent + * operator. It changes the size of each tuple emitted. In a typical case, + * where only subset of columns are selected the average row size will + * reduce as some of the columns will be pruned. In order to accurately + * compute the average row size, column level statistics is required. + * Column level statistics stores average size of values in column which + * can be used to more reliably estimate the reduction in size of each + * tuple. In the absence of column level statistics, size of columns will be + * based on data type. For primitive data types size from + * {@link org.apache.hadoop.hive.ql.util.JavaDataModel} will be + * used and for variable length data types worst case will be assumed. + * + *

+ * For more information, refer 'Estimating The Cost Of Operations' chapter in + * "Database Systems: The Complete Book" by Garcia-Molina et. al. + *

+ * + */ + public static class SelectStatsRule extends DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + SelectOperator sop = (SelectOperator) nd; + Operator parent = sop.getParentOperators().get(0); + Statistics parentStats = parent.getStatistics(); + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + HiveConf conf = aspCtx.getConf(); + + // SELECT (*) does not change the statistics. Just pass on the parent statistics + if (sop.getConf().isSelectStar()) { + try { + if (parentStats != null) { + sop.setStatistics(parentStats.clone()); + } + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + return null; + } + + try { + if (satisfyPrecondition(parentStats)) { + Statistics stats = parentStats.clone(); + List colStats = StatsUtils.getColStatisticsFromExprMap(conf, parentStats, + sop.getColumnExprMap(), sop.getSchema()); + long dataSize = StatsUtils.getDataSizeFromColumnStats(stats.getNumRows(), colStats); + stats.setColumnStats(colStats); + stats.setDataSize(dataSize); + sop.setStatistics(stats); + } else { + if (parentStats != null) { + sop.setStatistics(parentStats.clone()); + } + } + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + return null; + } + + } + + /** + * FILTER operator does not change the average row size but it does change + * the number of rows emitted. The reduction in the number of rows emitted + * is dependent on the filter expression. + * + *
    + * Notations: + *
  • T(S) - Number of tuples in relations S
  • + *
  • V(S,A) - Number of distinct values of attribute A in relation S
  • + *
+ * + *
    + * Rules: + *
  • Column equals a constant
  • T(S) = T(R) / V(R,A) + *

    + * + *

  • Inequality conditions
  • T(S) = T(R) / 3 + *

    + * + *

  • Not equals comparison
  • - Simple formula T(S) = T(R) + *

    + * - Alternate formula T(S) = T(R) (V(R,A) - 1) / V(R,A) + *

    + * + *

  • NOT condition
  • T(S) = 1 - T(S'), where T(S') is the satisfying condition + *

    + * + *

  • Multiple AND conditions
  • Cascadingly apply the rules 1 to 3 (order doesn't matter) + *

    + * + *

  • Multiple OR conditions
  • - Simple formula is to evaluate conditions independently + * and sum the results T(S) = m1 + m2 + *

    + * + * - Alternate formula T(S) = T(R) * ( 1 - ( 1 - m1/T(R) ) * ( 1 - m2/T(R) )) + *

    + * where, m1 is the number of tuples that satisfy condition1 and m2 is the number of tuples that + * satisfy condition2 + *

+ *

+ * For more information, refer 'Estimating The Cost Of Operations' chapter in + * "Database Systems: The Complete Book" by Garcia-Molina et. al. + *

+ * + */ + public static class FilterStatsRule extends DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + FilterOperator fop = (FilterOperator) nd; + Operator parent = fop.getParentOperators().get(0); + Statistics parentStats = parent.getStatistics(); + + try { + if (satisfyPrecondition(parentStats)) { + ExprNodeDesc pred = fop.getConf().getPredicate(); + + // evaluate filter expression and update statistics + long newNumRows = evaluateExpression(parentStats, pred, aspCtx); + Statistics st = parentStats.clone(); + updateStats(st, newNumRows); + fop.setStatistics(st); + } else { + if (parentStats != null) { + fop.setStatistics(parentStats.clone()); + } + } + + aspCtx.setAndExprStats(null); + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + return null; + } + + private long evaluateExpression(Statistics stats, ExprNodeDesc pred, + AnnotateStatsProcCtx aspCtx) throws CloneNotSupportedException { + long newNumRows = 0; + Statistics andStats = null; + if (pred instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred; + GenericUDF udf = genFunc.getGenericUDF(); + + // for AND condition cascadingly update stats + if (udf instanceof GenericUDFOPAnd) { + andStats = stats.clone(); + aspCtx.setAndExprStats(andStats); + + // evaluate children + for (ExprNodeDesc child : genFunc.getChildren()) { + newNumRows = evaluateChildExpr(aspCtx.getAndExprStats(), child, aspCtx); + updateStats(aspCtx.getAndExprStats(), newNumRows); + } + } else { + + // for OR condition independently compute and update stats + if (udf instanceof GenericUDFOPOr) { + for (ExprNodeDesc child : genFunc.getChildren()) { + newNumRows += evaluateChildExpr(stats, child, aspCtx); + } + } else if (udf instanceof GenericUDFOPNot) { + newNumRows = evaluateNotExpr(stats, pred, aspCtx); + } else if (udf instanceof GenericUDFOPNotNull) { + newNumRows = evaluateColEqualsNullExpr(stats, pred, aspCtx); + newNumRows = stats.getNumRows() - newNumRows; + } else if (udf instanceof GenericUDFOPNull) { + newNumRows = evaluateColEqualsNullExpr(stats, pred, aspCtx); + } else { + + // single predicate condition + newNumRows = evaluateChildExpr(stats, pred, aspCtx); + } + } + } else if (pred instanceof ExprNodeColumnDesc) { + + // can be boolean column in which case return true count + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) pred; + String colName = encd.getColumn(); + String tabAlias = encd.getTabAlias(); + String colType = encd.getTypeString(); + if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) { + ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + return cs.getNumTrues(); + } else { + + // if not boolean column return half the number of rows + return stats.getNumRows() / 2; + } + } + + return newNumRows; + } + + private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx) + throws CloneNotSupportedException { + + long numRows = stats.getNumRows(); + + // if the evaluate yields true then pass all rows else pass 0 rows + if (pred instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred; + for (ExprNodeDesc leaf : genFunc.getChildren()) { + if (leaf instanceof ExprNodeGenericFuncDesc) { + + // GenericUDF + long newNumRows = 0; + for (ExprNodeDesc child : ((ExprNodeGenericFuncDesc) pred).getChildren()) { + newNumRows = evaluateChildExpr(stats, child, aspCtx); + } + return numRows - newNumRows; + } else if (leaf instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc encd = (ExprNodeConstantDesc) leaf; + if (encd.getValue().equals(true)) { + return 0; + } else { + return numRows; + } + } else if (leaf instanceof ExprNodeColumnDesc) { + + // NOT on boolean columns is possible. in which case return false count. + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) leaf; + String colName = encd.getColumn(); + String tabAlias = encd.getTabAlias(); + String colType = encd.getTypeString(); + if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) { + ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + return cs.getNumFalses(); + } else { + + // if not boolean column return half the number of rows + return numRows / 2; + } + } + } + } + + // worst case + return numRows; + } + + private long evaluateColEqualsNullExpr(Statistics stats, ExprNodeDesc pred, + AnnotateStatsProcCtx aspCtx) { + + long numRows = stats.getNumRows(); + + // evaluate similar to "col = constant" expr + if (pred instanceof ExprNodeGenericFuncDesc) { + + ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred; + for (ExprNodeDesc leaf : genFunc.getChildren()) { + + if (leaf instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf; + String colName = colDesc.getColumn(); + String tabAlias = colDesc.getTabAlias(); + ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + if (cs != null) { + long dvs = cs.getCountDistint(); + // if NULLs exists, add 1 to distinct count + if (cs.getNumNulls() > 0) { + dvs += 1; + } + if (dvs != 0) { + return numRows / dvs; + } else { + return numRows; + } + } + } + } + } + + // worst case + return numRows; + } + + private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, AnnotateStatsProcCtx aspCtx) + throws CloneNotSupportedException { + + long numRows = stats.getNumRows(); + + if (child instanceof ExprNodeGenericFuncDesc) { + + ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) child; + GenericUDF udf = genFunc.getGenericUDF(); + + if (udf instanceof GenericUDFOPEqual || udf instanceof GenericUDFOPEqualNS) { + String colName = null; + String tabAlias = null; + boolean isConst = false; + + for (ExprNodeDesc leaf : genFunc.getChildren()) { + if (leaf instanceof ExprNodeConstantDesc) { + + // if the first argument is const then just set the flag and continue + if (colName == null) { + isConst = true; + continue; + } + ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + if (cs != null) { + long dvs = cs.getCountDistint(); + // if NULLs exists, add 1 to distinct count + if (cs.getNumNulls() > 0) { + dvs += 1; + } + + if (dvs != 0) { + return numRows / dvs; + } else { + return numRows; + } + } + } else if (leaf instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf; + colName = colDesc.getColumn(); + tabAlias = colDesc.getTabAlias(); + + // if const is first argument then evaluate the result + if (isConst) { + ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + if (cs != null) { + long dvs = cs.getCountDistint(); + // if NULLs exists, add 1 to distinct count + if (cs.getNumNulls() > 0) { + dvs += 1; + } + + if (dvs != 0) { + return numRows / dvs; + } else { + return numRows; + } + } + } + } + } + } else if (udf instanceof GenericUDFOPNotEqual) { + return numRows; + } else if (udf instanceof GenericUDFOPEqualOrGreaterThan || + udf instanceof GenericUDFOPEqualOrLessThan || + udf instanceof GenericUDFOPGreaterThan || + udf instanceof GenericUDFOPLessThan) { + return numRows / 3; + } else { + return evaluateExpression(stats, genFunc, aspCtx); + } + } + + // worst case + return numRows; + } + + } + + /** + * GROUPBY operator changes the number of rows. The number of rows emitted + * by GBY operator will be atleast 1 or utmost T(R) (number of rows in relation T) + * based on the aggregation. A better estimate can be found if we have column statistics + * on the columns that we are grouping on. + *

+ * Suppose if we are grouping by attributes A,B,C and if statistics for columns A,B,C are + * available then a better estimate can be found by taking the smaller of product of V(R,[A,B,C]) + * (product of distinct cardinalities of A,B,C) and T(R)/2. + *

+ * T(R) = min (T(R)/2 , V(R,[A,B,C]) ---> [1] + * + *

+ * In the presence of grouping sets, map-side GBY will emit more rows depending on the size of + * grouping set (input rows * size of grouping set). These rows will get reduced because of + * map-side hash aggregation. Hash aggregation is an optimization in hive to reduce the number of + * rows shuffled between map and reduce stage. This optimization will be disabled if the memory + * used for hash aggregation exceeds 90% of max available memory for hash aggregation. The number + * of rows emitted from map-side will vary if hash aggregation is enabled throughout execution or + * disabled. In the presence of grouping sets, following rules will be applied + *

+ * If hash-aggregation is enabled, for query SELECT * FROM table GROUP BY (A,B) WITH CUBE + *

+ * T(R) = min(T(R)/2, T(R, GBY(A,B)) + T(R, GBY(A)) + T(R, GBY(B)) + 1)) + *

+ * where, GBY(A,B), GBY(B), GBY(B) are the GBY rules mentioned above [1] + * + *

+ * If hash-aggregation is disabled, apply the GBY rule [1] and then multiply the result by + * number of elements in grouping set T(R) = T(R) * length_of_grouping_set. Since we do not know + * if hash-aggregation is enabled or disabled during compile time, we will assume worst-case i.e, + * hash-aggregation is disabled + * + *

+ * NOTE: The number of rows from map-side GBY operator is dependent on map-side parallelism i.e, + * number of mappers. The map-side parallelism is expected from hive config + * "hive.stats.map.parallelism". If the config is not set then default parallelism of 1 will be + * assumed. + * + *

+ * For more information, refer 'Estimating The Cost Of Operations' chapter in + * "Database Systems: The Complete Book" by Garcia-Molina et. al. + *

+ * + */ + public static class GroupByStatsRule extends DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + GroupByOperator gop = (GroupByOperator) nd; + Operator parent = gop.getParentOperators().get(0); + Statistics parentStats = parent.getStatistics(); + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + HiveConf conf = aspCtx.getConf(); + int mapSideParallelism = HiveConf.getIntVar(conf, + HiveConf.ConfVars.HIVE_STATS_MAP_SIDE_PARALLELISM); + + try { + if (satisfyPrecondition(parentStats)) { + Statistics stats = parentStats.clone(); + RowSchema rs = gop.getSchema(); + List aggDesc = gop.getConf().getAggregators(); + Map colExprMap = gop.getColumnExprMap(); + List colStats = StatsUtils.getColStatisticsFromExprMap(conf, parentStats, + colExprMap, rs); + stats.setColumnStats(colStats); + long dvProd = 1; + long newNumRows = 0; + + // compute product of distinct values of grouping columns + for (ColStatistics cs : colStats) { + if (cs != null) { + long dv = cs.getCountDistint(); + if (cs.getNumNulls() > 0) { + dv += 1; + } + dvProd *= dv; + } + } + + // map side + if (gop.getChildOperators().get(0) instanceof ReduceSinkOperator) { + + // since we do not know if hash-aggregation will be enabled or disabled + // at runtime we will assume that map-side group by does not do any reduction. + // hence no group by rule will be applied + + // map-side grouping set present. if grouping set is present then + // multiply the number of rows by number of elements in grouping set + if (gop.getConf().isGroupingSetsPresent()) { + int multiplier = gop.getConf().getListGroupingSets().size(); + + // take into account the map-side parallelism as well, default is 1 + multiplier *= mapSideParallelism; + newNumRows = multiplier * stats.getNumRows(); + long dataSize = multiplier * stats.getDataSize(); + stats.setNumRows(newNumRows); + stats.setDataSize(dataSize); + for (ColStatistics cs : colStats) { + if (cs != null) { + long oldNumNulls = cs.getNumNulls(); + long newNumNulls = multiplier * oldNumNulls; + cs.setNumNulls(newNumNulls); + } + } + } else { + + // map side no grouping set + newNumRows = stats.getNumRows() * mapSideParallelism; + updateStats(stats, newNumRows); + } + } else { + + // reduce side + newNumRows = applyGBYRule(stats.getNumRows(), dvProd); + updateStats(stats, newNumRows); + } + + // if UDAFs are present, new columns needs to be added + if (!aggDesc.isEmpty()) { + List aggColStats = Lists.newArrayList(); + for (ColumnInfo ci : rs.getSignature()) { + + // if the columns in row schema is not contained in column + // expression map, then those are the aggregate columns that + // are added GBY operator. we will estimate the column statistics + // for those newly added columns + if (!colExprMap.containsKey(ci.getInternalName())) { + String colName = ci.getInternalName(); + colName = StatsUtils.stripPrefixFromColumnName(colName); + String tabAlias = ci.getTabAlias(); + String colType = ci.getTypeName(); + ColStatistics cs = new ColStatistics(tabAlias, colName, colType); + cs.setCountDistint(stats.getNumRows()); + cs.setNumNulls(0); + cs.setAvgColLen(StatsUtils.getAvgColLenOfFixedLengthTypes(colType)); + aggColStats.add(cs); + } + } + stats.addToColumnStats(aggColStats); + + // if UDAF present and if column expression map is empty then it must + // be full aggregation query like count(*) in which case number of rows will be 1 + if (colExprMap.isEmpty()) { + stats.setNumRows(1); + updateStats(stats, 1); + } + } + + gop.setStatistics(stats); + } else { + if (parentStats != null) { + gop.setStatistics(parentStats.clone()); + } + } + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + return null; + } + + private long applyGBYRule(long numRows, long dvProd) { + long newNumRows = numRows; + + // to avoid divide by 2 to become 0 + if (numRows > 1) { + if (dvProd != 0) { + newNumRows = Math.min(numRows / 2, dvProd); + } else { + newNumRows = numRows / 2; + } + } + return newNumRows; + } + } + + /** + * JOIN operator can yield any of the following three cases
  • The values of join keys are + * disjoint in both relations in which case T(RXS) = 0 (we need histograms for this)
  • Join + * key is primary key on relation R and foreign key on relation S in which case every tuple in S + * will have a tuple in R T(RXS) = T(S) (we need histograms for this)
  • Both R & S relation + * have same value for join-key. Ex: bool column with all true values T(RXS) = T(R) * T(S) (we + * need histograms for this. counDistinct = 1 and same value)
  • + * + *

    + * In the absence of histograms, we can use the following general case + *

    + * Single attribute + *

    + * T(RXS) = (T(R)*T(S))/max(V(R,Y), V(S,Y)) where Y is the join attribute + *

    + * Multiple attributes + *

    + * T(RXS) = T(R)*T(S)/max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2)), where y1 and y2 are the join + * attributes + * + *

    + * For more information, refer 'Estimating The Cost Of Operations' chapter in + * "Database Systems: The Complete Book" by Garcia-Molina et. al. + *

    + */ + public static class JoinStatsRule extends DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + CommonJoinOperator jop = (CommonJoinOperator) nd; + List> parents = jop.getParentOperators(); + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + HiveConf conf = aspCtx.getConf(); + boolean allStatsAvail = true; + boolean allSatisfyPreCondition = true; + + for (Operator op : parents) { + if (op.getStatistics() == null) { + allStatsAvail = false; + } + } + + if (allStatsAvail) { + + for (Operator op : parents) { + if (!satisfyPrecondition(op.getStatistics())) { + allSatisfyPreCondition = false; + } + } + + if (allSatisfyPreCondition) { + // statistics object that is combination of statistics from all relations involved in JOIN + Statistics stats = new Statistics(); + long prodRows = 1; + List distinctVals = Lists.newArrayList(); + boolean multiAttr = false; + + + Map joinedColStats = Maps.newHashMap(); + Map> joinKeys = Maps.newHashMap(); + + // get the join keys from parent ReduceSink operators + for (int pos = 0; pos < parents.size(); pos++) { + ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); + + Statistics parentStats = parent.getStatistics(); + prodRows *= parentStats.getNumRows(); + List keyExprs = parent.getConf().getKeyCols(); + + // multi-attribute join key + if (keyExprs.size() > 1) { + multiAttr = true; + } + + // compute fully qualified join key column names. this name will be used to + // quickly look-up for column statistics of join key. + // TODO: expressions in join condition will be ignored. assign internal name + // for expressions and estimate column statistics for expression. + List fqCols = StatsUtils.getFullQualifedColNameFromExprs(keyExprs, + parent.getColumnExprMap()); + joinKeys.put(pos, fqCols); + + Map colExprMap = parent.getColumnExprMap(); + RowSchema rs = parent.getSchema(); + + // get column statistics for all output columns + List cs = StatsUtils.getColStatisticsFromExprMap(conf, parentStats, + colExprMap, rs); + for (ColStatistics c : cs) { + if (c != null) { + joinedColStats.put(c.getFullyQualifiedColName(), c); + } + } + + // since new statistics is derived from all relations involved in JOIN, + // we need to update the state information accordingly + stats.updateBasicStatsState(parentStats.getBasicStatsState()); + stats.updateColumnStatsState(parentStats.getColumnStatsState()); + } + + // compute denominator i.e, max(V(R,Y), V(S,Y)) in case of single attribute join. + // else max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2)) in case of multi-attribute join + long denom = 1; + if (multiAttr) { + List perAttrDVs = Lists.newArrayList(); + int numAttr = joinKeys.get(0).size(); + for (int idx = 0; idx < numAttr; idx++) { + for (Integer i : joinKeys.keySet()) { + String col = joinKeys.get(i).get(idx); + ColStatistics cs = joinedColStats.get(col); + if (cs != null) { + perAttrDVs.add(cs.getCountDistint()); + } + } + distinctVals.add(getDenominator(perAttrDVs)); + perAttrDVs.clear(); + } + + for (Long l : distinctVals) { + denom *= l; + } + } else { + for (List jkeys : joinKeys.values()) { + for (String jk : jkeys) { + ColStatistics cs = joinedColStats.get(jk); + if (cs != null) { + distinctVals.add(cs.getCountDistint()); + } + } + } + denom = getDenominator(distinctVals); + } + + // column statistics from different sources are put together and rename + // fully qualified column names based on output schema of join operator + Map colExprMap = jop.getColumnExprMap(); + RowSchema rs = jop.getSchema(); + List outColStats = Lists.newArrayList(); + for (ColumnInfo ci : rs.getSignature()) { + String key = ci.getInternalName(); + ExprNodeDesc end = colExprMap.get(key); + if (end instanceof ExprNodeColumnDesc) { + String colName = ((ExprNodeColumnDesc) end).getColumn(); + colName = StatsUtils.stripPrefixFromColumnName(colName); + String tabAlias = ((ExprNodeColumnDesc) end).getTabAlias(); + String fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName); + ColStatistics cs = joinedColStats.get(fqColName); + String outColName = key; + String outTabAlias = ci.getTabAlias(); + outColName = StatsUtils.stripPrefixFromColumnName(outColName); + if (cs != null) { + cs.setColumnName(outColName); + cs.setTableAlias(outTabAlias); + } + outColStats.add(cs); + } + } + + // update join statistics + stats.setColumnStats(outColStats); + long newRowCount = prodRows / denom; + stats.setNumRows(newRowCount); + stats.setDataSize(StatsUtils.getDataSizeFromColumnStats(newRowCount, outColStats)); + jop.setStatistics(stats); + } + } + return null; + } + + private long getDenominator(List distinctVals) { + + // simple join from 2 relations + // denom = max(v1, v2) + if (distinctVals.size() <= 2) { + return Collections.max(distinctVals); + } else { + + // join from multiple relations + // denom = max(v1, v2) * max(v2, v3) * max(v3, v4) + long denom = 1; + for (int i = 0; i < distinctVals.size() - 1; i++) { + long v1 = distinctVals.get(i); + long v2 = distinctVals.get(i + 1); + if (v1 >= v2) { + denom *= v1; + } else { + denom *= v2; + } + } + return denom; + } + } + + } + + /** + * LIMIT operator changes the number of rows and thereby the data size. + * + */ + public static class LimitStatsRule extends DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + LimitOperator lop = (LimitOperator) nd; + Operator parent = lop.getParentOperators().get(0); + Statistics parentStats = parent.getStatistics(); + + try { + if (satisfyPrecondition(parentStats)) { + Statistics stats = parentStats.clone(); + long limit = -1; + limit = lop.getConf().getLimit(); + if (limit == -1) { + limit = lop.getConf().getLeastRows(); + } + + // if limit is greate than available rows then do not update statistics + if (limit <= parentStats.getNumRows()) { + updateStats(stats, limit); + } + lop.setStatistics(stats); + } else { + if (parentStats != null) { + lop.setStatistics(parentStats.clone()); + } + } + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + return null; + } + + } + + /** + * Default rule is to aggregate the statistics from all its parent operators. + * + */ + public static class DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + Operator op = (Operator) nd; + OperatorDesc conf = op.getConf(); + + if (conf != null) { + Statistics stats = conf.getStatistics(); + if (stats == null) { + if (op.getParentOperators() != null) { + + // if parent statistics is null then that branch of the tree is not walked yet. + // don't update the stats until all branches are walked + if (isAllParentsContainStatistics(op)) { + stats = new Statistics(); + for (Operator parent : op.getParentOperators()) { + if (parent.getStatistics() != null) { + Statistics parentStats = parent.getStatistics(); + stats.addToNumRows(parentStats.getNumRows()); + stats.addToDataSize(parentStats.getDataSize()); + stats.updateBasicStatsState(parentStats.getBasicStatsState()); + stats.updateColumnStatsState(parentStats.getColumnStatsState()); + stats.addToColumnStats(parentStats.getColumnStats()); + op.getConf().setStatistics(stats); + } + } + } + } + } + } + return null; + } + + // check if all parent statistics are available + private boolean isAllParentsContainStatistics(Operator op) { + for (Operator parent : op.getParentOperators()) { + if (parent.getStatistics() == null) { + return false; + } + } + return true; + } + + } + + public static NodeProcessor getTableScanRule() { + return new TableScanStatsRule(); + } + + public static NodeProcessor getSelectRule() { + return new SelectStatsRule(); + } + + public static NodeProcessor getFilterRule() { + return new FilterStatsRule(); + } + + public static NodeProcessor getGroupByRule() { + return new GroupByStatsRule(); + } + + public static NodeProcessor getJoinRule() { + return new JoinStatsRule(); + } + + public static NodeProcessor getLimitRule() { + return new LimitStatsRule(); + } + + public static NodeProcessor getDefaultRule() { + return new DefaultStatsRule(); + } + + /** + * Update the basic statistics of the statistics object based on the row number + * + * @param stats + * - statistics to be updated + * @param newNumRows + * - new number of rows + */ + static void updateStats(Statistics stats, long newNumRows) { + long oldRowCount = stats.getNumRows(); + double ratio = (double) newNumRows / (double) oldRowCount; + stats.setNumRows(newNumRows); + + List colStats = stats.getColumnStats(); + for (ColStatistics cs : colStats) { + long oldNumNulls = cs.getNumNulls(); + long oldDV = cs.getCountDistint(); + long newNumNulls = Math.round(ratio * oldNumNulls); + long newDV = oldDV; + + // if ratio is greater than 1, then number of rows increases. This can happen + // when some operators like GROUPBY duplicates the input rows in which case + // number of distincts should not change. Update the distinct count only when + // the output number of rows is less than input number of rows. + if (ratio <= 1.0) { + newDV = Math.round(ratio * oldDV); + } + cs.setNumNulls(newNumNulls); + cs.setCountDistint(newDV); + } + stats.setColumnStats(colStats); + long newDataSize = StatsUtils.getDataSizeFromColumnStats(newNumRows, colStats); + stats.setDataSize(newDataSize); + } + + static boolean satisfyPrecondition(Statistics stats) { + return stats != null && stats.getBasicStatsState().equals(Statistics.State.COMPLETE) + && !stats.getColumnStatsState().equals(Statistics.State.NONE); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java index c096a65..24694ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java @@ -21,6 +21,18 @@ public class AbstractOperatorDesc implements OperatorDesc { private boolean vectorMode = false; + protected transient Statistics statistics; + + @Override + @Explain(displayName = "Statistics", normalExplain = false) + public Statistics getStatistics() { + return statistics; + } + + @Override + public void setStatistics(Statistics statistics) { + this.statistics = statistics; + } @Override public Object clone() throws CloneNotSupportedException { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java new file mode 100644 index 0000000..0749dc0 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java @@ -0,0 +1,156 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.hive.ql.stats.StatsUtils; + + +public class ColStatistics { + + private String tabAlias; + private String colName; + private String colType; + private String fqColName; + private long countDistint; + private long numNulls; + private double avgColLen; + private long numTrues; + private long numFalses; + + public ColStatistics(String tabAlias, String colName, String colType) { + this.setTableAlias(tabAlias); + this.setColumnName(colName); + this.setColumnType(colType); + this.setFullyQualifiedColName(StatsUtils.getFullyQualifiedColumnName(tabAlias, colName)); + } + + public ColStatistics() { + this(null, null, null); + } + + public String getColumnName() { + return colName; + } + + public void setColumnName(String colName) { + this.colName = colName; + this.fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName); + } + + public String getColumnType() { + return colType; + } + + public void setColumnType(String colType) { + this.colType = colType; + } + + public long getCountDistint() { + return countDistint; + } + + public void setCountDistint(long countDistint) { + this.countDistint = countDistint; + } + + public long getNumNulls() { + return numNulls; + } + + public void setNumNulls(long numNulls) { + this.numNulls = numNulls; + } + + public double getAvgColLen() { + return avgColLen; + } + + public void setAvgColLen(double avgColLen) { + this.avgColLen = avgColLen; + } + + public String getFullyQualifiedColName() { + return fqColName; + } + + public void setFullyQualifiedColName(String fqColName) { + this.fqColName = fqColName; + } + + public String getTableAlias() { + return tabAlias; + } + + public void setTableAlias(String tabName) { + this.tabAlias = tabName; + this.fqColName = StatsUtils.getFullyQualifiedColumnName(tabName, colName); + } + + public long getNumTrues() { + return numTrues; + } + + public void setNumTrues(long numTrues) { + this.numTrues = numTrues; + } + + public long getNumFalses() { + return numFalses; + } + + public void setNumFalses(long numFalses) { + this.numFalses = numFalses; + } + + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(" fqColName: "); + sb.append(fqColName); + sb.append(" colName: "); + sb.append(colName); + sb.append(" colType: "); + sb.append(colType); + sb.append(" countDistincts: "); + sb.append(countDistint); + sb.append(" numNulls: "); + sb.append(numNulls); + sb.append(" avgColLen: "); + sb.append(avgColLen); + sb.append(" numTrues: "); + sb.append(numTrues); + sb.append(" numFalses: "); + sb.append(numFalses); + return sb.toString(); + } + + @Override + public ColStatistics clone() throws CloneNotSupportedException { + ColStatistics clone = new ColStatistics(tabAlias, colName, colType); + clone.setFullyQualifiedColName(fqColName); + clone.setAvgColLen(avgColLen); + clone.setCountDistint(countDistint); + clone.setNumNulls(numNulls); + clone.setNumTrues(numTrues); + clone.setNumFalses(numFalses); + return clone; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java index 36757e8..6c2efaf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java @@ -22,4 +22,6 @@ public interface OperatorDesc extends Serializable, Cloneable { public Object clone() throws CloneNotSupportedException; + public Statistics getStatistics(); + public void setStatistics(Statistics statistics); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java new file mode 100644 index 0000000..a16c8ff --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -0,0 +1,252 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.ql.stats.StatsUtils; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +/** + * Statistics. Describes the output of an operator in terms of size, rows, etc + * based on estimates. + */ +@SuppressWarnings("serial") +public class Statistics implements Serializable { + + public enum State { + COMPLETE, PARTIAL, NONE + } + + private long numRows; + private long dataSize; + private State basicStatsState; + private Map columnStats; + private State columnStatsState; + + public Statistics() { + this(0, 0); + } + + public Statistics(long nr, long ds) { + this.numRows = nr; + this.dataSize = ds; + this.basicStatsState = State.NONE; + this.columnStats = null; + this.columnStatsState = State.NONE; + } + + public long getNumRows() { + return numRows; + } + + public void setNumRows(long numRows) { + this.numRows = numRows; + } + + public long getDataSize() { + return dataSize; + } + + public void setDataSize(long dataSize) { + this.dataSize = dataSize; + } + + public State getBasicStatsState() { + return basicStatsState; + } + + public void setBasicStatsState(State basicStatsState) { + this.basicStatsState = basicStatsState; + } + + public State getColumnStatsState() { + return columnStatsState; + } + + public void setColumnStatsState(State columnStatsState) { + this.columnStatsState = columnStatsState; + } + + @Override + @Explain(displayName = "") + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(" numRows: "); + sb.append(numRows); + sb.append(" dataSize: "); + sb.append(dataSize); + sb.append(" basicStatsState: "); + sb.append(basicStatsState); + sb.append(" colStatsState: "); + sb.append(columnStatsState); + return sb.toString(); + } + + @Override + public Statistics clone() throws CloneNotSupportedException { + Statistics clone = new Statistics(numRows, dataSize); + clone.setBasicStatsState(basicStatsState); + clone.setColumnStatsState(columnStatsState); + if (columnStats != null) { + Map cloneColStats = Maps.newHashMap(); + for (Map.Entry entry : columnStats.entrySet()) { + cloneColStats.put(entry.getKey(), entry.getValue().clone()); + } + clone.setColumnStats(cloneColStats); + } + return clone; + } + + public void addToNumRows(long nr) { + numRows += nr; + } + + public void addToDataSize(long rds) { + dataSize += rds; + } + + public void setColumnStats(Map colStats) { + this.columnStats = colStats; + } + + public void setColumnStats(List colStats) { + columnStats = Maps.newHashMap(); + addToColumnStats(colStats); + } + + public void addToColumnStats(List colStats) { + + if (columnStats == null) { + columnStats = Maps.newHashMap(); + } + + if (colStats != null) { + for (ColStatistics cs : colStats) { + ColStatistics updatedCS = null; + if (cs != null) { + + String key = cs.getFullyQualifiedColName(); + // if column statistics for a column is already found then merge the statistics + if (columnStats.containsKey(key) && columnStats.get(key) != null) { + updatedCS = columnStats.get(key); + updatedCS.setAvgColLen(Math.max(updatedCS.getAvgColLen(), cs.getAvgColLen())); + updatedCS.setNumNulls(updatedCS.getNumNulls() + cs.getNumNulls()); + updatedCS.setCountDistint(Math.max(updatedCS.getCountDistint(), cs.getCountDistint())); + columnStats.put(key, updatedCS); + } else { + columnStats.put(key, cs); + } + } + } + } + } + + // newState + // ----------------------------------------- + // basicStatsState | COMPLETE PARTIAL NONE | + // |________________________________________| + // COMPLETE | COMPLETE PARTIAL PARTIAL | + // PARTIAL | PARTIAL PARTIAL PARTIAL | + // NONE | COMPLETE PARTIAL NONE | + // ----------------------------------------- + public void updateBasicStatsState(State newState) { + if (newState.equals(State.PARTIAL)) { + basicStatsState = State.PARTIAL; + } + + if (newState.equals(State.NONE)) { + if (basicStatsState.equals(State.NONE)) { + basicStatsState = State.NONE; + } else { + basicStatsState = State.PARTIAL; + } + } + + if (newState.equals(State.COMPLETE)) { + if (basicStatsState.equals(State.PARTIAL)) { + basicStatsState = State.PARTIAL; + } else { + basicStatsState = State.COMPLETE; + } + } + } + + // similar to the table above for basic stats + public void updateColumnStatsState(State newState) { + if (newState.equals(State.PARTIAL)) { + columnStatsState = State.PARTIAL; + } + + if (newState.equals(State.NONE)) { + if (columnStatsState.equals(State.NONE)) { + columnStatsState = State.NONE; + } else { + columnStatsState = State.PARTIAL; + } + } + + if (newState.equals(State.COMPLETE)) { + if (columnStatsState.equals(State.PARTIAL)) { + columnStatsState = State.PARTIAL; + } else { + columnStatsState = State.COMPLETE; + } + } + } + + public long getAvgRowSize() { + if (basicStatsState.equals(State.COMPLETE) && numRows != 0) { + return dataSize / numRows; + } + + return 0; + } + + public ColStatistics getColumnStatisticsFromFQColName(String fqColName) { + return columnStats.get(fqColName); + } + + public ColStatistics getColumnStatisticsFromColName(String colName) { + for (ColStatistics cs : columnStats.values()) { + if (cs.getColumnName().equalsIgnoreCase(colName)) { + return cs; + } + } + + return null; + } + + public ColStatistics getColumnStatisticsForColumn(String tabAlias, String colName) { + String fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName); + return getColumnStatisticsFromFQColName(fqColName); + } + + public List getColumnStats() { + if (columnStats != null) { + return Lists.newArrayList(columnStats.values()); + } + return null; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java new file mode 100644 index 0000000..13b2ffe --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -0,0 +1,1252 @@ +package org.apache.hadoop.hive.ql.stats; + +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantBinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; +import org.apache.hadoop.io.BytesWritable; + +import com.google.common.base.Joiner; +import com.google.common.collect.Lists; + +public class StatsUtils { + + /** + * Collect table, partition and column level statistics + * + * @param conf + * - hive configuration + * @param partList + * - partition list + * @param table + * - table + * @param tableScanOperator + * - table scan operator + * @return statistics object + * @throws HiveException + */ + public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, + Table table, TableScanOperator tableScanOperator) { + + Statistics stats = new Statistics(); + + // column level statistics are required only for the columns that are needed + List schema = tableScanOperator.getSchema().getSignature(); + List neededColumns = tableScanOperator.getNeededColumns(); + String dbName = table.getDbName(); + String tabName = table.getTableName(); + + if (!table.isPartitioned()) { + long nr = getNumRows(dbName, tabName); + long rds = getRawDataSize(dbName, tabName); + if (rds <= 0) { + rds = getTotalSize(dbName, tabName); + + // if data size is still 0 then get file size + if (rds <= 0) { + rds = getFileSizeForTable(conf, table); + } + } + + // if basic stats are not available then return + if (nr <= 0 && rds <= 0) { + stats.setBasicStatsState(Statistics.State.NONE); + return stats; + } + + // if any basic stats is missing, mark it as partial stats + if (nr <= 0 || rds <= 0) { + stats.setBasicStatsState(Statistics.State.PARTIAL); + } + + // if both are available then we have complete basic stats + if (nr > 0 && rds > 0) { + stats.setBasicStatsState(Statistics.State.COMPLETE); + } + + // number of rows -1 means that statistics from metastore is not reliable + if (nr <= 0) { + nr = 0; + } + stats.setNumRows(nr); + stats.setDataSize(rds); + + List colStats = getTableColumnStats(table, schema, neededColumns); + + // if column stats available and if atleast one column doesn't have stats + // then mark it as partial + if (checkIfColStatsAvailable(colStats) && colStats.contains(null)) { + stats.setColumnStatsState(Statistics.State.PARTIAL); + } + + // if column stats available and if all columns have stats then mark it + // as complete + if (checkIfColStatsAvailable(colStats) && !colStats.contains(null)) { + stats.setColumnStatsState(Statistics.State.COMPLETE); + } + + if (!checkIfColStatsAvailable(colStats)) { + // if there is column projection and if we do not have stats then mark + // it as NONE. Else we will have stats for const/udf columns + if (!neededColumns.isEmpty()) { + stats.setColumnStatsState(Statistics.State.NONE); + } else { + stats.setColumnStatsState(Statistics.State.COMPLETE); + } + stats.addToColumnStats(null); + } else { + // set col stats and mark it as table level col stats + stats.addToColumnStats(colStats); + } + } else { + + // For partitioned tables, get the size of all the partitions after pruning + // the partitions that are not required + if (partList != null) { + List partNames = Lists.newArrayList(); + for (Partition part : partList.getNotDeniedPartns()) { + partNames.add(part.getName()); + } + + List rowCounts = getBasicStatForPartitions(table, partNames, + StatsSetupConst.ROW_COUNT); + List dataSizes = getBasicStatForPartitions(table, partNames, + StatsSetupConst.RAW_DATA_SIZE); + + long nr = getSumIgnoreNegatives(rowCounts); + long rds = getSumIgnoreNegatives(dataSizes); + if (rds <= 0) { + dataSizes = getBasicStatForPartitions(table, partNames, StatsSetupConst.TOTAL_SIZE); + rds = getSumIgnoreNegatives(dataSizes); + + // if data size still could not be determined, then fall back to filesytem to get file + // sizes + if (rds <= 0) { + dataSizes = getFileSizeForPartitions(conf, partList.getNotDeniedPartns()); + } + rds = getSumIgnoreNegatives(dataSizes); + } + + // basic stats + if (nr <= 0 && rds <= 0) { + stats.updateBasicStatsState(Statistics.State.NONE); + } else if (nr <= 0 || rds <= 0) { + stats.updateBasicStatsState(Statistics.State.PARTIAL); + } else { + if (containsNonPositives(rowCounts) || containsNonPositives(dataSizes)) { + stats.updateBasicStatsState(Statistics.State.PARTIAL); + } else { + stats.updateBasicStatsState(Statistics.State.COMPLETE); + } + } + + // number of rows -1 means that statistics from metastore is not reliable + if (nr <= 0) { + nr = 0; + } + stats.addToNumRows(nr); + stats.addToDataSize(rds); + + // column stats + for (Partition part : partList.getNotDeniedPartns()) { + List colStats = getPartitionColumnStats(table, part, schema, neededColumns); + if (checkIfColStatsAvailable(colStats) && colStats.contains(null)) { + stats.updateColumnStatsState(Statistics.State.PARTIAL); + } else if (checkIfColStatsAvailable(colStats) && !colStats.contains(null)) { + stats.updateColumnStatsState(Statistics.State.COMPLETE); + } else { + // if there is column projection and if we do not have stats then mark + // it as NONE. Else we will have stats for const/udf columns + if (!neededColumns.isEmpty()) { + stats.updateColumnStatsState(Statistics.State.NONE); + } else { + stats.updateColumnStatsState(Statistics.State.COMPLETE); + } + } + stats.addToColumnStats(colStats); + } + } + } + + return stats; + + } + + /** + * Find the bytes on disk occupied by a table + * + * @param conf + * - hive conf + * @param table + * - table + * @return size on disk + */ + public static long getFileSizeForTable(HiveConf conf, Table table) { + Path path = table.getPath(); + long size = 0; + try { + FileSystem fs = path.getFileSystem(conf); + size = fs.getContentSummary(path).getLength(); + } catch (Exception e) { + size = 0; + } + return size; + } + + /** + * Find the bytes on disks occupied by list of partitions + * + * @param conf + * - hive conf + * @param parts + * - partition list + * @return sizes of patitions + */ + public static List getFileSizeForPartitions(HiveConf conf, List parts) { + List sizes = Lists.newArrayList(); + for (Partition part : parts) { + Path path = part.getPartitionPath(); + long size = 0; + try { + FileSystem fs = path.getFileSystem(conf); + size = fs.getContentSummary(path).getLength(); + } catch (Exception e) { + size = 0; + } + sizes.add(size); + } + return sizes; + } + + private static boolean containsNonPositives(List vals) { + for (Long val : vals) { + if (val <= 0L) { + return true; + } + } + return false; + } + + /** + * Get sum of all values in the list that are >0 + * + * @param vals + * - list of values + * @return sum + */ + public static long getSumIgnoreNegatives(List vals) { + long result = 0; + for (Long l : vals) { + if (l > 0) { + result += l; + } + } + return result; + } + + /** + * Get the partition level columns statistics from metastore for all the needed columns + * + * @param table + * - table object + * @param part + * - partition object + * @param schema + * - output schema + * @param neededColumns + * - list of needed columns + * @return column statistics + */ + public static List getPartitionColumnStats(Table table, Partition part, + List schema, List neededColumns) { + + String dbName = table.getDbName(); + String tabName = table.getTableName(); + String partName = part.getName(); + List colStatistics = Lists.newArrayList(); + for (ColumnInfo col : schema) { + if (!col.isHiddenVirtualCol()) { + String colName = col.getInternalName(); + if (neededColumns.contains(colName)) { + String tabAlias = col.getTabAlias(); + ColStatistics cs = getParitionColumnStatsForColumn(dbName, tabName, partName, colName); + if (cs != null) { + cs.setTableAlias(tabAlias); + } + colStatistics.add(cs); + } + } + } + return colStatistics; + } + + /** + * Get the partition level columns statistics from metastore for a specific column + * + * @param dbName + * - database name + * @param tabName + * - table name + * @param partName + * - partition name + * @param colName + * - column name + * @return column statistics + */ + public static ColStatistics getParitionColumnStatsForColumn(String dbName, String tabName, + String partName, String colName) { + try { + ColumnStatistics colStats = Hive.get().getPartitionColumnStatistics(dbName, tabName, + partName, colName); + if (colStats != null) { + return getColStatistics(colStats.getStatsObj().get(0), tabName, colName); + } + } catch (HiveException e) { + return null; + } + return null; + } + + /** + * Will return true if column statistics for atleast one column is available + * + * @param colStats + * - column stats + * @return + */ + private static boolean checkIfColStatsAvailable(List colStats) { + for (ColStatistics cs : colStats) { + if (cs != null) { + return true; + } + } + return false; + } + + /** + * Get table level column stats for specified column + * + * @param dbName + * - database name + * @param tableName + * - table name + * @param colName + * - column name + * @return column stats + */ + public static ColStatistics getTableColumnStatsForColumn(String dbName, String tableName, + String colName) { + try { + ColumnStatistics colStat = Hive.get().getTableColumnStatistics(dbName, tableName, colName); + if (colStat != null) { + // there will be only one column statistics object + return getColStatistics(colStat.getStatsObj().get(0), tableName, colName); + } + } catch (HiveException e) { + return null; + } + return null; + } + + /** + * Convert ColumnStatisticsObj to ColStatistics + * + * @param cso + * - ColumnStatisticsObj + * @param tabName + * - table name + * @param colName + * - column name + * @return ColStatistics + */ + public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tabName, + String colName) { + ColStatistics cs = new ColStatistics(tabName, colName, cso.getColType()); + String colType = cso.getColType(); + ColumnStatisticsData csd = cso.getStatsData(); + if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)) { + cs.setCountDistint(csd.getLongStats().getNumDVs()); + cs.setNumNulls(csd.getLongStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive1()); + } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { + cs.setCountDistint(csd.getLongStats().getNumDVs()); + cs.setNumNulls(csd.getLongStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive2()); + } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) { + cs.setCountDistint(csd.getDoubleStats().getNumDVs()); + cs.setNumNulls(csd.getDoubleStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive1()); + } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { + cs.setCountDistint(csd.getDoubleStats().getNumDVs()); + cs.setNumNulls(csd.getDoubleStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive2()); + } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) + || colType.startsWith(serdeConstants.CHAR_TYPE_NAME) + || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) { + cs.setCountDistint(csd.getStringStats().getNumDVs()); + cs.setNumNulls(csd.getStringStats().getNumNulls()); + cs.setAvgColLen(csd.getStringStats().getAvgColLen()); + } else if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) { + if (csd.getBooleanStats().getNumFalses() > 0 && csd.getBooleanStats().getNumTrues() > 0) { + cs.setCountDistint(2); + } else { + cs.setCountDistint(1); + } + cs.setNumTrues(csd.getBooleanStats().getNumTrues()); + cs.setNumFalses(csd.getBooleanStats().getNumFalses()); + cs.setNumNulls(csd.getBooleanStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive1()); + } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) { + cs.setAvgColLen(csd.getBinaryStats().getAvgColLen()); + cs.setNumNulls(csd.getBinaryStats().getNumNulls()); + } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) { + cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp()); + } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { + cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal()); + } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) { + cs.setAvgColLen(JavaDataModel.get().lengthOfDate()); + } else { + // Columns statistics for complex datatypes are not supported yet + return null; + } + return cs; + } + + /** + * Get table level column statistics from metastore for needed columns + * + * @param table + * - table + * @param schema + * - output schema + * @param neededColumns + * - list of needed columns + * @return column statistics + */ + public static List getTableColumnStats(Table table, List schema, + List neededColumns) { + + String dbName = table.getDbName(); + String tabName = table.getTableName(); + List colStatistics = Lists.newArrayList(); + for (ColumnInfo col : schema) { + if (!col.isHiddenVirtualCol()) { + String colName = col.getInternalName(); + if (neededColumns.contains(colName)) { + String tabAlias = col.getTabAlias(); + ColStatistics cs = getTableColumnStatsForColumn(dbName, tabName, colName); + if (cs != null) { + cs.setTableAlias(tabAlias); + } + colStatistics.add(cs); + } + } + } + return colStatistics; + } + + /** + * Get the raw data size of variable length data types + * + * @param conf + * - hive conf + * @param oi + * - object inspector + * @param colType + * - column type + * @return raw data size + */ + public static long getAvgColLenOfVariableLengthTypes(HiveConf conf, ObjectInspector oi, + String colType) { + + long configVarLen = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAX_VARIABLE_LENGTH); + + if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) { + + // constant string projection Ex: select "hello" from table + if (oi instanceof ConstantObjectInspector) { + ConstantObjectInspector coi = (ConstantObjectInspector) oi; + + // if writable constant is null then return size 0 + if (coi.getWritableConstantValue() == null) { + return 0; + } + + return coi.getWritableConstantValue().toString().length(); + } else if (oi instanceof WritableConstantStringObjectInspector) { + + // some UDFs return writable constant strings (fixed width) + // Ex: select upper("hello") from table + WritableConstantStringObjectInspector wcsoi = (WritableConstantStringObjectInspector) oi; + + return wcsoi.getWritableConstantValue().toString().length(); + } else if (oi instanceof WritableStringObjectInspector) { + + // some UDFs may emit strings of variable length. like pattern matching + // UDFs. it's hard to find the length of such UDFs. + // return the variable length from config + return configVarLen; + } + } else if (colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) { + + // constant varchar projection + if (oi instanceof ConstantObjectInspector) { + ConstantObjectInspector coi = (ConstantObjectInspector) oi; + + // if writable constant is null then return size 0 + if (coi.getWritableConstantValue() == null) { + return 0; + } + + return coi.getWritableConstantValue().toString().length(); + } else if (oi instanceof WritableConstantHiveVarcharObjectInspector) { + + WritableConstantHiveVarcharObjectInspector wcsoi = (WritableConstantHiveVarcharObjectInspector) oi; + return wcsoi.getWritableConstantValue().toString().length(); + } else if (oi instanceof WritableHiveVarcharObjectInspector) { + return ((WritableHiveVarcharObjectInspector)oi).getMaxLength(); + } + } else if (colType.startsWith(serdeConstants.CHAR_TYPE_NAME)) { + + // constant char projection + if (oi instanceof ConstantObjectInspector) { + ConstantObjectInspector coi = (ConstantObjectInspector) oi; + + // if writable constant is null then return size 0 + if (coi.getWritableConstantValue() == null) { + return 0; + } + + return coi.getWritableConstantValue().toString().length(); + } else if (oi instanceof WritableConstantHiveCharObjectInspector) { + + WritableConstantHiveCharObjectInspector wcsoi = (WritableConstantHiveCharObjectInspector) oi; + return wcsoi.getWritableConstantValue().toString().length(); + } else if (oi instanceof WritableHiveCharObjectInspector) { + return ((WritableHiveCharObjectInspector) oi).getMaxLength(); + } + } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) { + + // constant byte arrays + if (oi instanceof ConstantObjectInspector) { + ConstantObjectInspector coi = (ConstantObjectInspector) oi; + + // if writable constant is null then return size 0 + if (coi.getWritableConstantValue() == null) { + return 0; + } + + BytesWritable bw = ((BytesWritable) coi.getWritableConstantValue()); + return bw.getLength(); + } else if (oi instanceof WritableConstantBinaryObjectInspector) { + + // writable constant byte arrays + WritableConstantBinaryObjectInspector wcboi = (WritableConstantBinaryObjectInspector) oi; + + return wcboi.getWritableConstantValue().getLength(); + } else if (oi instanceof WritableBinaryObjectInspector) { + + // return the variable length from config + return configVarLen; + } + } else { + + // complex types (map, list, struct, union) + return getSizeOfComplexTypes(conf, oi); + } + + return 0; + } + + /** + * Get the size of complex data types + * + * @param conf + * - hive conf + * @param oi + * - object inspector + * @return raw data size + */ + public static long getSizeOfComplexTypes(HiveConf conf, ObjectInspector oi) { + long result = 0; + int length = 0; + int listEntries = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_LIST_NUM_ENTRIES); + int mapEntries = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAP_NUM_ENTRIES); + + switch (oi.getCategory()) { + case PRIMITIVE: + String colType = oi.getTypeName(); + if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) + || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME) + || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)) { + int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colType); + result += JavaDataModel.get().lengthForStringOfLength(avgColLen); + } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) { + int avgColLen = (int) getAvgColLenOfVariableLengthTypes(conf, oi, colType); + result += JavaDataModel.get().lengthForByteArrayOfSize(avgColLen); + } else { + result += getAvgColLenOfFixedLengthTypes(colType); + } + break; + case LIST: + if (oi instanceof StandardConstantListObjectInspector) { + + // constant list projection of known length + StandardConstantListObjectInspector scloi = (StandardConstantListObjectInspector) oi; + length = scloi.getWritableConstantValue().size(); + + // check if list elements are primitive or Objects + ObjectInspector leoi = scloi.getListElementObjectInspector(); + if (leoi.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) { + result += getSizeOfPrimitiveTypeArraysFromType(leoi.getTypeName(), length); + } else { + result += JavaDataModel.get().lengthForObjectArrayOfSize(length); + } + } else { + StandardListObjectInspector sloi = (StandardListObjectInspector) oi; + + // list overhead + (configured number of element in list * size of element) + long elemSize = getSizeOfComplexTypes(conf, sloi.getListElementObjectInspector()); + result += JavaDataModel.get().arrayList() + (listEntries * elemSize); + } + break; + case MAP: + if (oi instanceof StandardConstantMapObjectInspector) { + + // constant map projection of known length + StandardConstantMapObjectInspector scmoi = (StandardConstantMapObjectInspector) oi; + result += getSizeOfMap(scmoi); + } else { + StandardMapObjectInspector smoi = (StandardMapObjectInspector) oi; + result += getSizeOfComplexTypes(conf, smoi.getMapKeyObjectInspector()); + result += getSizeOfComplexTypes(conf, smoi.getMapValueObjectInspector()); + + // hash map overhead + result += JavaDataModel.get().hashMap(mapEntries); + } + break; + case STRUCT: + StructObjectInspector soi = (StructObjectInspector) oi; + + // add constant object overhead for struct + result += JavaDataModel.get().object(); + + // add constant struct field names references overhead + result += soi.getAllStructFieldRefs().size() * JavaDataModel.get().ref(); + for (StructField field : soi.getAllStructFieldRefs()) { + result += getSizeOfComplexTypes(conf, field.getFieldObjectInspector()); + } + break; + case UNION: + UnionObjectInspector uoi = (UnionObjectInspector) oi; + + // add constant object overhead for union + result += JavaDataModel.get().object(); + + // add constant size for unions tags + result += uoi.getObjectInspectors().size() * JavaDataModel.get().primitive1(); + for (ObjectInspector foi : uoi.getObjectInspectors()) { + result += getSizeOfComplexTypes(conf, foi); + } + break; + default: + break; + } + + return result; + } + + /** + * Get size of fixed length primitives + * + * @param colType + * - column type + * @return raw data size + */ + public static long getAvgColLenOfFixedLengthTypes(String colType) { + if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) { + return JavaDataModel.get().primitive1(); + } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { + return JavaDataModel.get().primitive2(); + } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) { + return JavaDataModel.get().lengthOfTimestamp(); + } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) { + return JavaDataModel.get().lengthOfDate(); + } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { + return JavaDataModel.get().lengthOfDecimal(); + } else { + return 0; + } + } + + /** + * Get the size of arrays of primitive types + * + * @param colType + * - column type + * @param length + * - array length + * @return raw data size + */ + public static long getSizeOfPrimitiveTypeArraysFromType(String colType, int length) { + if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) { + return JavaDataModel.get().lengthForIntArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { + return JavaDataModel.get().lengthForDoubleArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { + return JavaDataModel.get().lengthForLongArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) { + return JavaDataModel.get().lengthForByteArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) { + return JavaDataModel.get().lengthForBooleanArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) { + return JavaDataModel.get().lengthForTimestampArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) { + return JavaDataModel.get().lengthForDateArrayOfSize(length); + } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { + return JavaDataModel.get().lengthForDecimalArrayOfSize(length); + } else { + return 0; + } + } + + /** + * Estimate the size of map object + * + * @param scmoi + * - object inspector + * @return size of map + */ + public static long getSizeOfMap(StandardConstantMapObjectInspector scmoi) { + Map map = scmoi.getWritableConstantValue(); + ObjectInspector koi = scmoi.getMapKeyObjectInspector(); + ObjectInspector voi = scmoi.getMapValueObjectInspector(); + long result = 0; + for (Map.Entry entry : map.entrySet()) { + result += getWritableSize(koi, entry.getKey()); + result += getWritableSize(voi, entry.getValue()); + } + + // add additional overhead of each map entries + result += JavaDataModel.get().hashMap(map.entrySet().size()); + return result; + } + + /** + * Get size of primitive data types based on their respective writable object inspector + * + * @param oi + * - object inspector + * @param value + * - value + * @return raw data size + */ + public static long getWritableSize(ObjectInspector oi, Object value) { + if (oi instanceof WritableStringObjectInspector) { + WritableStringObjectInspector woi = (WritableStringObjectInspector) oi; + return JavaDataModel.get().lengthForStringOfLength( + woi.getPrimitiveWritableObject(value).getLength()); + } else if (oi instanceof WritableBinaryObjectInspector) { + WritableBinaryObjectInspector woi = (WritableBinaryObjectInspector) oi; + return JavaDataModel.get().lengthForByteArrayOfSize( + woi.getPrimitiveWritableObject(value).getLength()); + } else if (oi instanceof WritableBooleanObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableByteObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableDateObjectInspector) { + return JavaDataModel.get().lengthOfDate(); + } else if (oi instanceof WritableDoubleObjectInspector) { + return JavaDataModel.get().primitive2(); + } else if (oi instanceof WritableFloatObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableHiveDecimalObjectInspector) { + return JavaDataModel.get().lengthOfDecimal(); + } else if (oi instanceof WritableIntObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableLongObjectInspector) { + return JavaDataModel.get().primitive2(); + } else if (oi instanceof WritableShortObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableTimestampObjectInspector) { + return JavaDataModel.get().lengthOfTimestamp(); + } + + return 0; + } + + /** + * Get column statistics from parent statistics. + * + * @param conf + * - hive conf + * @param parentStats + * - parent statistics + * @param colExprMap + * - column expression map + * @param rowSchema + * - row schema + * @return column statistics + */ + public static List getColStatisticsFromExprMap(HiveConf conf, + Statistics parentStats, + Map colExprMap, RowSchema rowSchema) { + List cs = Lists.newArrayList(); + for (ColumnInfo ci : rowSchema.getSignature()) { + String outColName = ci.getInternalName(); + String outTabAlias = ci.getTabAlias(); + ExprNodeDesc end = colExprMap.get(outColName); + if (end == null) { + outColName = StatsUtils.stripPrefixFromColumnName(outColName); + end = colExprMap.get(outColName); + } + ColStatistics colStat = getColStatisticsFromExpression(conf, parentStats, end); + if (colStat != null) { + outColName = StatsUtils.stripPrefixFromColumnName(outColName); + colStat.setColumnName(outColName); + colStat.setTableAlias(outTabAlias); + } + cs.add(colStat); + } + return cs; + } + + /** + * Get column statistics expression nodes + * + * @param conf + * - hive conf + * @param parentStats + * - parent statistics + * @param end + * - expression nodes + * @return column statistics + */ + public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statistics parentStats, + ExprNodeDesc end) { + + if (end == null) { + return null; + } + + String colName = null; + String colType = null; + double avgColSize = 0; + long countDistincts = 0; + long numNulls = 0; + ObjectInspector oi = null; + long numRows = parentStats.getNumRows(); + String tabAlias = null; + + if (end instanceof ExprNodeColumnDesc) { + // column projection + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; + colName = encd.getColumn(); + tabAlias = encd.getTabAlias(); + colName = stripPrefixFromColumnName(colName); + + if (encd.getIsPartitionColOrVirtualCol()) { + + // vitual columns + colType = encd.getTypeInfo().getTypeName(); + countDistincts = numRows; + oi = encd.getWritableObjectInspector(); + } else { + + // clone the column stats and return + ColStatistics result = parentStats.getColumnStatisticsForColumn(tabAlias, colName); + if (result != null) { + try { + return result.clone(); + } catch (CloneNotSupportedException e) { + return null; + } + } + return null; + } + } else if (end instanceof ExprNodeConstantDesc) { + + // constant projection + ExprNodeConstantDesc encd = (ExprNodeConstantDesc) end; + + // null projection + if (encd.getValue() == null) { + colName = encd.getName(); + colType = "null"; + numNulls = numRows; + } else { + colName = encd.getName(); + colType = encd.getTypeString(); + countDistincts = 1; + oi = encd.getWritableObjectInspector(); + } + } else if (end instanceof ExprNodeGenericFuncDesc) { + + // udf projection + ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end; + colName = engfd.getName(); + colType = engfd.getTypeString(); + countDistincts = numRows; + oi = engfd.getWritableObjectInspector(); + } else if (end instanceof ExprNodeNullDesc) { + + // null projection + ExprNodeNullDesc ennd = (ExprNodeNullDesc) end; + colName = ennd.getName(); + colType = "null"; + numNulls = numRows; + } + + if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME) + || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME) + || colType.startsWith(serdeConstants.CHAR_TYPE_NAME) + || colType.startsWith(serdeConstants.LIST_TYPE_NAME) + || colType.startsWith(serdeConstants.MAP_TYPE_NAME) + || colType.startsWith(serdeConstants.STRUCT_TYPE_NAME) + || colType.startsWith(serdeConstants.UNION_TYPE_NAME)) { + avgColSize = getAvgColLenOfVariableLengthTypes(conf, oi, colType); + } else { + avgColSize = getAvgColLenOfFixedLengthTypes(colType); + } + + ColStatistics colStats = new ColStatistics(tabAlias, colName, colType); + colStats.setAvgColLen(avgColSize); + colStats.setCountDistint(countDistincts); + colStats.setNumNulls(numNulls); + + return colStats; + } + + /** + * Get number of rows of a give table + * + * @param dbName + * - database name + * @param tabName + * - table name + * @return number of rows + */ + public static long getNumRows(String dbName, String tabName) { + return getBasicStatForTable(dbName, tabName, StatsSetupConst.ROW_COUNT); + } + + /** + * Get raw data size of a give table + * + * @param dbName + * - database name + * @param tabName + * - table name + * @return raw data size + */ + public static long getRawDataSize(String dbName, String tabName) { + return getBasicStatForTable(dbName, tabName, StatsSetupConst.RAW_DATA_SIZE); + } + + /** + * Get total size of a give table + * + * @param dbName + * - database name + * @param tabName + * - table name + * @return total size + */ + public static long getTotalSize(String dbName, String tabName) { + return getBasicStatForTable(dbName, tabName, StatsSetupConst.TOTAL_SIZE); + } + + /** + * Get basic stats of table + * + * @param dbName + * - database name + * @param tabName + * - table name + * @param statType + * - type of stats + * @return value of stats + */ + public static long getBasicStatForTable(String dbName, String tabName, String statType) { + + Table table; + try { + table = Hive.get().getTable(dbName, tabName); + } catch (HiveException e) { + return 0; + } + + Map params = table.getParameters(); + long result = 0; + + if (params != null) { + try { + result = Long.parseLong(params.get(statType)); + } catch (NumberFormatException e) { + result = 0; + } + } + return result; + } + + /** + * Get basic stats of partitions + * + * @param table + * - table + * @param partNames + * - partition names + * @param statType + * - type of stats + * @return value of stats + */ + public static List getBasicStatForPartitions(Table table, List partNames, + String statType) { + + List stats = Lists.newArrayList(); + List parts; + try { + parts = Hive.get().getPartitionsByNames(table, partNames); + } catch (HiveException e1) { + return stats; + } + + for (Partition part : parts) { + Map params = part.getParameters(); + long result = 0; + if (params != null) { + try { + result = Long.parseLong(params.get(statType)); + } catch (NumberFormatException e) { + result = 0; + } + stats.add(result); + } + } + return stats; + } + + /** + * Compute raw data size from column statistics + * + * @param numRows + * - number of rows + * @param colStats + * - column statistics + * @return raw data size + */ + public static long getDataSizeFromColumnStats(long numRows, List colStats) { + long result = 0; + + if (numRows <= 0) { + return result; + } + + for (ColStatistics cs : colStats) { + if (cs != null) { + String colType = cs.getColumnType(); + long nonNullCount = numRows - cs.getNumNulls(); + if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { + + result += nonNullCount * cs.getAvgColLen(); + } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) + || colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME) + || colType.startsWith(serdeConstants.CHAR_TYPE_NAME)) { + + int acl = (int) Math.round(cs.getAvgColLen()); + result += nonNullCount * JavaDataModel.get().lengthForStringOfLength(acl); + } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) { + + int acl = (int) Math.round(cs.getAvgColLen()); + result += nonNullCount * JavaDataModel.get().lengthForByteArrayOfSize(acl); + } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) { + + result += nonNullCount * JavaDataModel.get().lengthOfTimestamp(); + } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { + + result += nonNullCount * JavaDataModel.get().lengthOfDecimal(); + } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) { + + result += nonNullCount * JavaDataModel.get().lengthOfDate(); + } else { + + result += nonNullCount * cs.getAvgColLen(); + } + } + } + + return result; + } + + /** + * Remove KEY/VALUE prefix from column name + * + * @param colName + * - column name + * @return column name + */ + public static String stripPrefixFromColumnName(String colName) { + String stripedName = colName; + if (colName.startsWith("KEY._") || colName.startsWith("VALUE._")) { + // strip off KEY./VALUE. from column name + stripedName = colName.split("\\.")[1]; + } + return stripedName; + } + + /** + * Returns fully qualified name of column + * + * @param tabName + * @param colName + * @return + */ + public static String getFullyQualifiedColumnName(String tabName, String colName) { + return getFullyQualifiedName(null, tabName, colName); + } + + /** + * Returns fully qualified name of column + * + * @param dbName + * @param tabName + * @param colName + * @return + */ + public static String getFullyQualifiedColumnName(String dbName, String tabName, String colName) { + return getFullyQualifiedName(dbName, tabName, colName); + } + + /** + * Returns fully qualified name of column + * + * @param dbName + * @param tabName + * @param partName + * @param colName + * @return + */ + public static String getFullyQualifiedColumnName(String dbName, String tabName, String partName, + String colName) { + return getFullyQualifiedName(dbName, tabName, partName, colName); + } + + private static String getFullyQualifiedName(String... names) { + List nonNullAndEmptyNames = Lists.newArrayList(); + for (String name : names) { + if (name != null && !name.isEmpty()) { + nonNullAndEmptyNames.add(name); + } + } + return Joiner.on(".").join(nonNullAndEmptyNames); + } + + /** + * Try to get fully qualified column name from expression node + * + * @param keyExprs + * - expression nodes + * @param map + * - column expression map + * @return list of fully qualified names + */ + public static List getFullQualifedColNameFromExprs(List keyExprs, + Map map) { + List result = Lists.newArrayList(); + if (keyExprs != null) { + for (ExprNodeDesc end : keyExprs) { + String outColName = null; + for (Map.Entry entry : map.entrySet()) { + if (entry.getValue().isSame(end)) { + outColName = entry.getKey(); + } + } + if (end instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; + if (outColName == null) { + outColName = encd.getColumn(); + } + String tabAlias = encd.getTabAlias(); + outColName = stripPrefixFromColumnName(outColName); + result.add(getFullyQualifiedColumnName(tabAlias, outColName)); + } else if (end instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc enf = (ExprNodeGenericFuncDesc) end; + List cols = getFullQualifedColNameFromExprs(enf.getChildren(), map); + String joinedStr = Joiner.on(".").skipNulls().join(cols); + result.add(joinedStr); + } else if (end instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc encd = (ExprNodeConstantDesc) end; + result.add(encd.getValue().toString()); + } + } + } + return result; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java index 9c3c4c0..3352a08 100644 --- ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java +++ ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java @@ -267,6 +267,15 @@ public int lengthForIntArrayOfSize(int length) { public int lengthForBooleanArrayOfSize(int length) { return lengthForPrimitiveArrayOfSize(PRIMITIVE_BYTE, length); } + public int lengthForTimestampArrayOfSize(int length) { + return lengthForPrimitiveArrayOfSize(lengthOfTimestamp(), length); + } + public int lengthForDateArrayOfSize(int length) { + return lengthForPrimitiveArrayOfSize(lengthOfDate(), length); + } + public int lengthForDecimalArrayOfSize(int length) { + return lengthForPrimitiveArrayOfSize(lengthOfDecimal(), length); + } public int lengthOfDecimal() { // object overhead + 8 bytes for intCompact + 4 bytes for precision diff --git ql/src/test/queries/clientpositive/annotate_stats_filter.q ql/src/test/queries/clientpositive/annotate_stats_filter.q new file mode 100644 index 0000000..0a645a6 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_filter.q @@ -0,0 +1,74 @@ +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc; + +-- column stats are not COMPLETE, so stats are not updated +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc where state='OH'; + +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- state column has 5 distincts. numRows/countDistincts +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where state='OH'; + +-- not equals comparison shouldn't affect number of rows. rawDataSize is 792 and not 796 because of rounding off issue with avgColLen. avgColLen uses integers and not double. +-- numRows: 8 rawDataSize: 804 +explain extended select * from loc_orc where state!='OH'; +explain extended select * from loc_orc where state<>'OH'; + +-- nulls are treated as constant equality comparison +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where zip is null; +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where !(zip is not null); + +-- not nulls are treated as inverse of nulls +-- numRows: 7 rawDataSize: 702 +explain extended select * from loc_orc where zip is not null; +-- numRows: 7 rawDataSize: 702 +explain extended select * from loc_orc where !(zip is null); + +-- NOT evaluation. true will pass all rows, false will not pass any rows +-- numRows: 8 rawDataSize: 804 +explain extended select * from loc_orc where !false; +-- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where !true; + +-- OR evaluation. 1 row for OH and 1 row for CA +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where state='OH' or state='CA'; + +-- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2 +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where year=2001 and year is null; +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where year=2001 and state='OH' and state='FL'; + +-- AND and OR together. left expr will yield 1 row and right will yield 1 row +-- numRows: 3 rawDataSize: 306 +explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA'); + +-- AND and OR together. left expr will yield 8 rows and right will yield 1 row +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA'); + +-- all inequality conditions rows/3 is the rules +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where locid < 30; +explain extended select * from loc_orc where locid > 30; +explain extended select * from loc_orc where locid <= 30; +explain extended select * from loc_orc where locid >= 30; diff --git ql/src/test/queries/clientpositive/annotate_stats_groupby.q ql/src/test/queries/clientpositive/annotate_stats_groupby.q new file mode 100644 index 0000000..ce29d52 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_groupby.q @@ -0,0 +1,55 @@ +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc; + +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- only one distinct value in year column + 1 NULL value +-- map-side GBY: numRows: 8 (map-side will not do any reduction) +-- reduce-side GBY: numRows: 2 +explain extended select year from loc_orc group by year; + +-- map-side GBY: numRows: 8 +-- reduce-side GBY: numRows: 4 +explain extended select state,locid from loc_orc group by state,locid; + +-- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain extended select state,locid from loc_orc group by state,locid with cube; + +-- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain extended select state,locid from loc_orc group by state,locid with rollup; + +-- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state)); + +-- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid)); + +-- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()); + +-- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()); + +set hive.stats.map.parallelism=10; + +-- map-side GBY: numRows: 80 (map-side will not do any reduction) +-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2) +explain extended select year from loc_orc group by year; + +-- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) +explain extended select state,locid from loc_orc group by state,locid with cube; + diff --git ql/src/test/queries/clientpositive/annotate_stats_join.q ql/src/test/queries/clientpositive/annotate_stats_join.q new file mode 100644 index 0000000..5683498 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_join.q @@ -0,0 +1,80 @@ +create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile; + +create table if not exists dept_staging ( + deptid int, + deptname string +) row format delimited fields terminated by '|' stored as textfile; + +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table if not exists emp_orc like emp_staging; +alter table emp_orc set fileformat orc; + +create table if not exists dept_orc like dept_staging; +alter table dept_orc set fileformat orc; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging; +LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging; +LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging; + + +insert overwrite table emp_orc select * from emp_staging; +insert overwrite table dept_orc select * from dept_staging; +insert overwrite table loc_orc select * from loc_staging; + +analyze table emp_orc compute statistics for columns lastname,deptid; +analyze table dept_orc compute statistics for columns deptname,deptid; +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- number of rows +-- emp_orc - 6 +-- dept_orc - 4 +-- loc_orc - 8 + +-- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows) +-- emp_orc.deptid - 3 +-- emp_orc.lastname - 7 +-- dept_orc.deptid - 6 +-- dept_orc.deptname - 5 +-- loc_orc.locid - 6 +-- loc_orc.state - 7 + +-- Expected output rows: 4 +-- Reason: #rows = (6*4)/max(3,6) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid); + +-- 3 way join +-- Expected output rows: 4 +-- Reason: #rows = (6*4*6)/max(3,6)*max(6,3) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid); + +-- Expected output rows: 5 +-- Reason: #rows = (6*4*8)/max(3,6)*max(6,6) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid); + +-- join keys of different types +-- Expected output rows: 4 +-- Reason: #rows = (6*4*8)/max(3,6)*max(6,7) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state); + +-- multi-attribute join +-- Expected output rows: 0 +-- Reason: #rows = (6*4)/max(3,6)*max(7,5) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname); + +-- 3 way and multi-attribute join +-- Expected output rows: 0 +-- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state); + diff --git ql/src/test/queries/clientpositive/annotate_stats_limit.q ql/src/test/queries/clientpositive/annotate_stats_limit.q new file mode 100644 index 0000000..e739326 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_limit.q @@ -0,0 +1,28 @@ +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +analyze table loc_orc compute statistics for columns state, locid, zip, year; + +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc; + +-- numRows: 4 rawDataSize: 396 +explain extended select * from loc_orc limit 4; + +-- greater than the available number of rows +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc limit 16; + +-- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc limit 0; diff --git ql/src/test/queries/clientpositive/annotate_stats_part.q ql/src/test/queries/clientpositive/annotate_stats_part.q new file mode 100644 index 0000000..05f3a19 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_part.q @@ -0,0 +1,78 @@ +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging; + +create table if not exists loc_orc ( + state string, + locid int, + zip bigint +) partitioned by(year int) stored as orc; + +-- basicStatState: NONE colStatState: NONE +explain extended select * from loc_orc; + +set hive.stats.autogather=false; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +insert overwrite table loc_orc partition(year) select * from loc_staging; + +-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc; + +-- partition level analyze statistics for specific parition +analyze table loc_orc partition(year=2001) compute statistics; + +-- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__'; + +-- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc; + +-- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year=2001; + +-- partition level analyze statistics for all partitions +analyze table loc_orc partition(year) compute statistics; + +-- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__'; + +-- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc; + +-- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year=2001 or year='__HIVE_DEFAULT_PARTITION__'; + +-- both partitions will be pruned +-- basicStatState: NONE colStatState: NONE +explain extended select * from loc_orc where year=2001 and year='__HIVE_DEFAULT_PARTITION__'; + +-- partition level partial column statistics +analyze table loc_orc partition(year=2001) compute statistics for columns state,locid; + +-- basicStatState: COMPLETE colStatState: NONE +explain extended select zip from loc_orc; + +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state from loc_orc; + +-- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state,locid from loc_orc; + +-- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select state,locid from loc_orc where year=2001; + +-- basicStatState: COMPLETE colStatState: NONE +explain extended select state,locid from loc_orc where year!=2001; + +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select * from loc_orc; diff --git ql/src/test/queries/clientpositive/annotate_stats_select.q ql/src/test/queries/clientpositive/annotate_stats_select.q new file mode 100644 index 0000000..93492f5 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_select.q @@ -0,0 +1,140 @@ +create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile; + +create table alltypes_orc like alltypes; +alter table alltypes_orc set fileformat orc; + +load data local inpath '../../data/files/alltypes.txt' overwrite into table alltypes; + +insert overwrite table alltypes_orc select * from alltypes; + +-- basicStatState: COMPLETE colStatState: NONE numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc; + +-- statistics for complex types are not supported yet +analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1,s1; + +-- numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc; + +-- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc; + +-- col alias renaming +-- numRows: 2 rawDataSize: 8 +explain extended select i1 as int1 from alltypes_orc; + +-- numRows: 2 rawDataSize: 174 +explain extended select s1 from alltypes_orc; + +-- column statistics for complex types unsupported and so statistics will not be updated +-- numRows: 2 rawDataSize: 1514 +explain extended select m1 from alltypes_orc; + +-- numRows: 2 rawDataSize: 246 +explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc; + +-- numRows: 2 rawDataSize: 0 +explain extended select null from alltypes_orc; + +-- numRows: 2 rawDataSize: 8 +explain extended select 11 from alltypes_orc; + +-- numRows: 2 rawDataSize: 16 +explain extended select 11L from alltypes_orc; + +-- numRows: 2 rawDataSize: 16 +explain extended select 11.0 from alltypes_orc; + +-- numRows: 2 rawDataSize: 178 +explain extended select "hello" from alltypes_orc; +explain extended select cast("hello" as char(5)) from alltypes_orc; +explain extended select cast("hello" as varchar(5)) from alltypes_orc; + +-- numRows: 2 rawDataSize: 96 +explain extended select unbase64("0xe23") from alltypes_orc; + +-- numRows: 2 rawDataSize: 16 +explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc; + +-- numRows: 2 rawDataSize: 80 +explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc; + +-- numRows: 2 rawDataSize: 112 +explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc; + +-- numRows: 2 rawDataSize: 224 +explain extended select cast("58.174" as DECIMAL) from alltypes_orc; + +-- numRows: 2 rawDataSize: 112 +explain extended select array(1,2,3) from alltypes_orc; + +-- numRows: 2 rawDataSize: 1508 +explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc; + +-- numRows: 2 rawDataSize: 112 +explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc; + +-- numRows: 2 rawDataSize: 250 +explain extended select CREATE_UNION(0, "hello") from alltypes_orc; + +-- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 8 +explain extended select count(*) from alltypes_orc; + +-- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 8 +explain extended select count(1) from alltypes_orc; + +-- column statistics for complex column types will be missing. data size will be calculated from available column statistics +-- numRows: 2 rawDataSize: 254 +explain extended select *,11 from alltypes_orc; + +-- subquery selects +-- inner select - numRows: 2 rawDataSize: 8 +-- outer select - numRows: 2 rawDataSize: 8 +explain extended select i1 from (select i1 from alltypes_orc limit 10) temp; + +-- inner select - numRows: 2 rawDataSize: 16 +-- outer select - numRows: 2 rawDataSize: 8 +explain extended select i1 from (select i1,11 from alltypes_orc limit 10) temp; + +-- inner select - numRows: 2 rawDataSize: 16 +-- outer select - numRows: 2 rawDataSize: 186 +explain extended select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp; + +-- inner select - numRows: 2 rawDataSize: 24 +-- outer select - numRows: 2 rawDataSize: 16 +explain extended select x from (select i1,11.0 as x from alltypes_orc limit 10) temp; + +-- inner select - numRows: 2 rawDataSize: 104 +-- outer select - numRows: 2 rawDataSize: 186 +explain extended select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp; + +-- inner select - numRows: 2 rawDataSize: 186 +-- middle select - numRows: 2 rawDataSize: 178 +-- outer select - numRows: 2 rawDataSize: 194 +explain extended select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2; + +-- This test is for FILTER operator where filter expression is a boolean column +-- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc where bo1; + +-- numRows: 0 rawDataSize: 0 +explain extended select bo1 from alltypes_orc where !bo1; diff --git ql/src/test/queries/clientpositive/annotate_stats_table.q ql/src/test/queries/clientpositive/annotate_stats_table.q new file mode 100644 index 0000000..1c7d163 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_table.q @@ -0,0 +1,52 @@ +create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile; + +create table if not exists emp_orc like emp_staging; +alter table emp_orc set fileformat orc; + +-- basicStatState: NONE colStatState: NONE +explain extended select * from emp_orc; + +LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging; + +set hive.stats.autogather=false; + +insert overwrite table emp_orc select * from emp_staging; + +-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL colStatState: NONE +explain extended select * from emp_orc; + +-- table level analyze statistics +analyze table emp_orc compute statistics; + +-- basicStatState: COMPLETE colStatState: NONE +explain extended select * from emp_orc; + +-- column level partial statistics +analyze table emp_orc compute statistics for columns deptid; + +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select * from emp_orc; + +-- all selected columns have statistics +-- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select deptid from emp_orc; + +-- column level complete statistics +analyze table emp_orc compute statistics for columns lastname,deptid; + +-- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select * from emp_orc; + +-- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select lastname from emp_orc; + +-- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select deptid from emp_orc; + +-- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select lastname,deptid from emp_orc; diff --git ql/src/test/queries/clientpositive/annotate_stats_union.q ql/src/test/queries/clientpositive/annotate_stats_union.q new file mode 100644 index 0000000..726b048 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_union.q @@ -0,0 +1,53 @@ +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- numRows: 8 rawDataSize: 688 +explain extended select state from loc_orc; + +-- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp; + +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc; + +-- numRows: 16 rawDataSize: 1592 +explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp; + +create database test; +use test; +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +analyze table loc_staging compute statistics; +analyze table loc_staging compute statistics for columns state,locid,zip,year; +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp; + +-- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp; diff --git ql/src/test/results/clientpositive/alter_partition_coltype.q.out ql/src/test/results/clientpositive/alter_partition_coltype.q.out index 7f86314..bcfe175 100644 --- ql/src/test/results/clientpositive/alter_partition_coltype.q.out +++ ql/src/test/results/clientpositive/alter_partition_coltype.q.out @@ -112,16 +112,24 @@ STAGE PLANS: alter_coltype TableScan alias: alter_coltype + Statistics: + numRows: 25 dataSize: 191 basicStatsState: COMPLETE colStatsState: COMPLETE GatherStats: false Select Operator + Statistics: + numRows: 25 dataSize: 191 basicStatsState: COMPLETE colStatsState: COMPLETE Group By Operator aggregations: expr: count() bucketGroup: false mode: hash outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE Reduce Output Operator sort order: + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE tag: -1 value expressions: expr: _col0 @@ -181,16 +189,22 @@ STAGE PLANS: bucketGroup: false mode: mergepartial outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE Select Operator expressions: expr: _col0 type: bigint outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -308,16 +322,24 @@ STAGE PLANS: alter_coltype TableScan alias: alter_coltype + Statistics: + numRows: 25 dataSize: 191 basicStatsState: COMPLETE colStatsState: COMPLETE GatherStats: false Select Operator + Statistics: + numRows: 25 dataSize: 191 basicStatsState: COMPLETE colStatsState: COMPLETE Group By Operator aggregations: expr: count() bucketGroup: false mode: hash outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE Reduce Output Operator sort order: + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE tag: -1 value expressions: expr: _col0 @@ -377,16 +399,22 @@ STAGE PLANS: bucketGroup: false mode: mergepartial outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE Select Operator expressions: expr: _col0 type: bigint outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -457,21 +485,31 @@ STAGE PLANS: alter_coltype TableScan alias: alter_coltype + Statistics: + numRows: 75 dataSize: 573 basicStatsState: COMPLETE colStatsState: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((ts = 3.0) and (dt = 10)) type: boolean + Statistics: + numRows: 75 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE Select Operator + Statistics: + numRows: 75 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE Group By Operator aggregations: expr: count() bucketGroup: false mode: hash outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE Reduce Output Operator sort order: + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE tag: -1 value expressions: expr: _col0 @@ -617,16 +655,22 @@ STAGE PLANS: bucketGroup: false mode: mergepartial outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE Select Operator expressions: expr: _col0 type: bigint outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -769,6 +813,8 @@ STAGE PLANS: alter_coltype TableScan alias: alter_coltype + Statistics: + numRows: 75 dataSize: 573 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -781,11 +827,15 @@ STAGE PLANS: expr: ts type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 75 dataSize: 573 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 75 dataSize: 573 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1008,16 +1058,24 @@ STAGE PLANS: alter_coltype TableScan alias: alter_coltype + Statistics: + numRows: 75 dataSize: 573 basicStatsState: COMPLETE colStatsState: COMPLETE GatherStats: false Select Operator + Statistics: + numRows: 75 dataSize: 573 basicStatsState: COMPLETE colStatsState: COMPLETE Group By Operator aggregations: expr: count() bucketGroup: false mode: hash outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE Reduce Output Operator sort order: + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE tag: -1 value expressions: expr: _col0 @@ -1163,16 +1221,22 @@ STAGE PLANS: bucketGroup: false mode: mergepartial outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE Select Operator expressions: expr: _col0 type: bigint outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/annotate_stats_filter.q.out ql/src/test/results/clientpositive/annotate_stats_filter.q.out new file mode 100644 index 0000000..7c1ebd3 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -0,0 +1,2470 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- column stats are not COMPLETE, so stats are not updated +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc where state='OH' +PREHOOK: type: QUERY +POSTHOOK: query: -- column stats are not COMPLETE, so stats are not updated +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc where state='OH' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL state) 'OH')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (state = 'OH') + type: boolean + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- state column has 5 distincts. numRows/countDistincts +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where state='OH' +PREHOOK: type: QUERY +POSTHOOK: query: -- state column has 5 distincts. numRows/countDistincts +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where state='OH' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL state) 'OH')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (state = 'OH') + type: boolean + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- not equals comparison shouldn't affect number of rows. rawDataSize is 792 and not 796 because of rounding off issue with avgColLen. avgColLen uses integers and not double. +-- numRows: 8 rawDataSize: 804 +explain extended select * from loc_orc where state!='OH' +PREHOOK: type: QUERY +POSTHOOK: query: -- not equals comparison shouldn't affect number of rows. rawDataSize is 792 and not 796 because of rounding off issue with avgColLen. avgColLen uses integers and not double. +-- numRows: 8 rawDataSize: 804 +explain extended select * from loc_orc where state!='OH' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (!= (TOK_TABLE_OR_COL state) 'OH')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (state <> 'OH') + type: boolean + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select * from loc_orc where state<>'OH' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from loc_orc where state<>'OH' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (<> (TOK_TABLE_OR_COL state) 'OH')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (state <> 'OH') + type: boolean + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- nulls are treated as constant equality comparison +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where zip is null +PREHOOK: type: QUERY +POSTHOOK: query: -- nulls are treated as constant equality comparison +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where zip is null +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL zip))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: zip is null + type: boolean + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where !(zip is not null) +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where !(zip is not null) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL zip)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not zip is not null) + type: boolean + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- not nulls are treated as inverse of nulls +-- numRows: 7 rawDataSize: 702 +explain extended select * from loc_orc where zip is not null +PREHOOK: type: QUERY +POSTHOOK: query: -- not nulls are treated as inverse of nulls +-- numRows: 7 rawDataSize: 702 +explain extended select * from loc_orc where zip is not null +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL zip))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: zip is not null + type: boolean + Statistics: + numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 7 rawDataSize: 702 +explain extended select * from loc_orc where !(zip is null) +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 7 rawDataSize: 702 +explain extended select * from loc_orc where !(zip is null) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL zip)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not zip is null) + type: boolean + Statistics: + numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 7 dataSize: 702 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- NOT evaluation. true will pass all rows, false will not pass any rows +-- numRows: 8 rawDataSize: 804 +explain extended select * from loc_orc where !false +PREHOOK: type: QUERY +POSTHOOK: query: -- NOT evaluation. true will pass all rows, false will not pass any rows +-- numRows: 8 rawDataSize: 804 +explain extended select * from loc_orc where !false +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! false)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not false) + type: boolean + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 804 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where !true +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where !true +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! true)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not true) + type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- OR evaluation. 1 row for OH and 1 row for CA +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where state='OH' or state='CA' +PREHOOK: type: QUERY +POSTHOOK: query: -- OR evaluation. 1 row for OH and 1 row for CA +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where state='OH' or state='CA' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (= (TOK_TABLE_OR_COL state) 'OH') (= (TOK_TABLE_OR_COL state) 'CA'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: ((state = 'OH') or (state = 'CA')) + type: boolean + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2 +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where year=2001 and year is null +PREHOOK: type: QUERY +POSTHOOK: query: -- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2 +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where year=2001 and year is null +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL year) 2001) (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL year)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: ((year = 2001) and year is null) + type: boolean + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where year=2001 and state='OH' and state='FL' +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where year=2001 and state='OH' and state='FL' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL year) 2001) (= (TOK_TABLE_OR_COL state) 'OH')) (= (TOK_TABLE_OR_COL state) 'FL'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (((year = 2001) and (state = 'OH')) and (state = 'FL')) + type: boolean + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- AND and OR together. left expr will yield 1 row and right will yield 1 row +-- numRows: 3 rawDataSize: 306 +explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA') +PREHOOK: type: QUERY +POSTHOOK: query: -- AND and OR together. left expr will yield 1 row and right will yield 1 row +-- numRows: 3 rawDataSize: 306 +explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA') +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (and (= (TOK_TABLE_OR_COL year) 2001) (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL year))) (= (TOK_TABLE_OR_COL state) 'CA'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (((year = 2001) and year is null) or (state = 'CA')) + type: boolean + Statistics: + numRows: 3 dataSize: 306 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 3 dataSize: 306 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 3 dataSize: 306 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- AND and OR together. left expr will yield 8 rows and right will yield 1 row +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA') +PREHOOK: type: QUERY +POSTHOOK: query: -- AND and OR together. left expr will yield 8 rows and right will yield 1 row +-- numRows: 1 rawDataSize: 102 +explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA') +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (or (= (TOK_TABLE_OR_COL year) 2001) (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL year))) (= (TOK_TABLE_OR_COL state) 'CA'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (((year = 2001) or year is null) and (state = 'CA')) + type: boolean + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 102 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- all inequality conditions rows/3 is the rules +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where locid < 30 +PREHOOK: type: QUERY +POSTHOOK: query: -- all inequality conditions rows/3 is the rules +-- numRows: 2 rawDataSize: 204 +explain extended select * from loc_orc where locid < 30 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL locid) 30)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (locid < 30) + type: boolean + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select * from loc_orc where locid > 30 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from loc_orc where locid > 30 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL locid) 30)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (locid > 30) + type: boolean + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select * from loc_orc where locid <= 30 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from loc_orc where locid <= 30 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (<= (TOK_TABLE_OR_COL locid) 30)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (locid <= 30) + type: boolean + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select * from loc_orc where locid >= 30 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from loc_orc where locid >= 30 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (>= (TOK_TABLE_OR_COL locid) 30)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (locid >= 30) + type: boolean + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 204 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out new file mode 100644 index 0000000..ee8e557 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -0,0 +1,1750 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- only one distinct value in year column + 1 NULL value +-- map-side GBY: numRows: 8 (map-side will not do any reduction) +-- reduce-side GBY: numRows: 2 +explain extended select year from loc_orc group by year +PREHOOK: type: QUERY +POSTHOOK: query: -- only one distinct value in year column + 1 NULL value +-- map-side GBY: numRows: 8 (map-side will not do any reduction) +-- reduce-side GBY: numRows: 2 +explain extended select year from loc_orc group by year +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL year))) (TOK_GROUPBY (TOK_TABLE_OR_COL year)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: year + type: int + outputColumnNames: year + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: year + type: int + mode: hash + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 28 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + Statistics: + numRows: 8 dataSize: 28 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: mergepartial + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY: numRows: 8 +-- reduce-side GBY: numRows: 4 +explain extended select state,locid from loc_orc group by state,locid +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY: numRows: 8 +-- reduce-side GBY: numRows: 4 +explain extended select state,locid from loc_orc group by state,locid +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + mode: hash + outputColumnNames: _col0, _col1 + Statistics: + numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + Statistics: + numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: + numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain extended select state,locid from loc_orc group by state,locid with cube +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain extended select state,locid from loc_orc group by state,locid with cube +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 32 dataSize: 3184 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 32 dataSize: 3184 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 16 dataSize: 2800 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 16 dataSize: 1440 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1440 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain extended select state,locid from loc_orc group by state,locid with rollup +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain extended select state,locid from loc_orc group by state,locid with rollup +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 24 dataSize: 2388 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 24 dataSize: 2388 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 12 dataSize: 2100 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 12 dataSize: 1080 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 12 dataSize: 1080 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state)) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state)) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 4 dataSize: 700 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL locid))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 8 dataSize: 1400 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL locid)) TOK_GROUPING_SETS_EXPRESSION))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 24 dataSize: 2388 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 24 dataSize: 2388 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 12 dataSize: 2100 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 12 dataSize: 1080 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 12 dataSize: 1080 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL locid)) TOK_GROUPING_SETS_EXPRESSION))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 32 dataSize: 3184 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 32 dataSize: 3184 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 16 dataSize: 2800 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 16 dataSize: 1440 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1440 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction) +-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2) +explain extended select year from loc_orc group by year +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction) +-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2) +explain extended select year from loc_orc group by year +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL year))) (TOK_GROUPBY (TOK_TABLE_OR_COL year)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: year + type: int + outputColumnNames: year + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: year + type: int + mode: hash + outputColumnNames: _col0 + Statistics: + numRows: 80 dataSize: 280 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + Statistics: + numRows: 80 dataSize: 280 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: mergepartial + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) +explain extended select state,locid from loc_orc group by state,locid with cube +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) +explain extended select state,locid from loc_orc group by state,locid with cube +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 320 dataSize: 31840 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 320 dataSize: 31840 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 42 dataSize: 7350 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 42 dataSize: 3780 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 42 dataSize: 3780 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/annotate_stats_join.q.out ql/src/test/results/clientpositive/annotate_stats_join.q.out new file mode 100644 index 0000000..f705f31 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -0,0 +1,1813 @@ +PREHOOK: query: create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@emp_staging +PREHOOK: query: create table if not exists dept_staging ( + deptid int, + deptname string +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists dept_staging ( + deptid int, + deptname string +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dept_staging +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table if not exists emp_orc like emp_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists emp_orc like emp_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@emp_orc +PREHOOK: query: alter table emp_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc +POSTHOOK: query: alter table emp_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc +PREHOOK: query: create table if not exists dept_orc like dept_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists dept_orc like dept_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dept_orc +PREHOOK: query: alter table dept_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@dept_orc +PREHOOK: Output: default@dept_orc +POSTHOOK: query: alter table dept_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@dept_orc +POSTHOOK: Output: default@dept_orc +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@emp_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@emp_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@dept_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dept_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table emp_orc select * from emp_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_staging +PREHOOK: Output: default@emp_orc +POSTHOOK: query: insert overwrite table emp_orc select * from emp_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_staging +POSTHOOK: Output: default@emp_orc +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: insert overwrite table dept_orc select * from dept_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_staging +PREHOOK: Output: default@dept_orc +POSTHOOK: query: insert overwrite table dept_orc select * from dept_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_staging +POSTHOOK: Output: default@dept_orc +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table dept_orc compute statistics for columns deptname,deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table dept_orc compute statistics for columns deptname,deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- number of rows +-- emp_orc - 6 +-- dept_orc - 4 +-- loc_orc - 8 + +-- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows) +-- emp_orc.deptid - 3 +-- emp_orc.lastname - 7 +-- dept_orc.deptid - 6 +-- dept_orc.deptname - 5 +-- loc_orc.locid - 6 +-- loc_orc.state - 7 + +-- Expected output rows: 4 +-- Reason: #rows = (6*4)/max(3,6) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +PREHOOK: type: QUERY +POSTHOOK: query: -- number of rows +-- emp_orc - 6 +-- dept_orc - 4 +-- loc_orc - 8 + +-- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows) +-- emp_orc.deptid - 3 +-- emp_orc.lastname - 7 +-- dept_orc.deptid - 6 +-- dept_orc.deptname - 5 +-- loc_orc.locid - 6 +-- loc_orc.state - 7 + +-- Expected output rows: 4 +-- Reason: #rows = (6*4)/max(3,6) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 1 + value expressions: + expr: deptid + type: int + expr: deptname + type: string + e + TableScan + alias: e + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: + numRows: 4 dataSize: 760 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 4 dataSize: 760 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 4 dataSize: 760 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- 3 way join +-- Expected output rows: 4 +-- Reason: #rows = (6*4*6)/max(3,6)*max(6,3) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid) +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 way join +-- Expected output rows: 4 +-- Reason: #rows = (6*4*6)/max(3,6)*max(6,3) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid))) (TOK_TABREF (TOK_TABNAME emp_orc) e1) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL e1) deptid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 1 + value expressions: + expr: deptid + type: int + expr: deptname + type: string + e + TableScan + alias: e + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + e1 + TableScan + alias: e1 + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 2 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e1, e] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 + Statistics: + numRows: 4 dataSize: 1136 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: string + expr: _col8 + type: string + expr: _col9 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: + numRows: 4 dataSize: 1136 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 4 dataSize: 1136 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types string:int:int:string:string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- Expected output rows: 5 +-- Reason: #rows = (6*4*8)/max(3,6)*max(6,6) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid) +PREHOOK: type: QUERY +POSTHOOK: query: -- Expected output rows: 5 +-- Reason: #rows = (6*4*8)/max(3,6)*max(6,6) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid))) (TOK_TABREF (TOK_TABNAME loc_orc) l) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL l) locid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 1 + value expressions: + expr: deptid + type: int + expr: deptname + type: string + e + TableScan + alias: e + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + l + TableScan + alias: l + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: locid + type: int + sort order: + + Map-reduce partition columns: + expr: locid + type: int + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 2 + value expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e] + /loc_orc [l] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11 + Statistics: + numRows: 5 dataSize: 1449 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: string + expr: _col8 + type: string + expr: _col9 + type: int + expr: _col10 + type: bigint + expr: _col11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: + numRows: 5 dataSize: 1449 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 5 dataSize: 1449 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types string:int:int:string:string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- join keys of different types +-- Expected output rows: 4 +-- Reason: #rows = (6*4*8)/max(3,6)*max(6,7) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state) +PREHOOK: type: QUERY +POSTHOOK: query: -- join keys of different types +-- Expected output rows: 4 +-- Reason: #rows = (6*4*8)/max(3,6)*max(6,7) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid))) (TOK_TABREF (TOK_TABNAME loc_orc) l) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL l) state)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: UDFToDouble(deptid) + type: double + sort order: + + Map-reduce partition columns: + expr: UDFToDouble(deptid) + type: double + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 1 + value expressions: + expr: deptid + type: int + expr: deptname + type: string + e + TableScan + alias: e + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: UDFToDouble(deptid) + type: double + sort order: + + Map-reduce partition columns: + expr: UDFToDouble(deptid) + type: double + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + l + TableScan + alias: l + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: UDFToDouble(state) + type: double + sort order: + + Map-reduce partition columns: + expr: UDFToDouble(state) + type: double + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 2 + value expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e] + /loc_orc [l] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11 + Statistics: + numRows: 4 dataSize: 1156 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: string + expr: _col8 + type: string + expr: _col9 + type: int + expr: _col10 + type: bigint + expr: _col11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: + numRows: 4 dataSize: 1156 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 4 dataSize: 1156 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types string:int:int:string:string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- multi-attribute join +-- Expected output rows: 0 +-- Reason: #rows = (6*4)/max(3,6)*max(7,5) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) +PREHOOK: type: QUERY +POSTHOOK: query: -- multi-attribute join +-- Expected output rows: 0 +-- Reason: #rows = (6*4)/max(3,6)*max(7,5) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (and (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)) (= (. (TOK_TABLE_OR_COL e) lastname) (. (TOK_TABLE_OR_COL d) deptname))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + expr: deptname + type: string + sort order: ++ + Map-reduce partition columns: + expr: deptid + type: int + expr: deptname + type: string + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 1 + value expressions: + expr: deptid + type: int + expr: deptname + type: string + e + TableScan + alias: e + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + expr: lastname + type: string + sort order: ++ + Map-reduce partition columns: + expr: deptid + type: int + expr: lastname + type: string + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- 3 way and multi-attribute join +-- Expected output rows: 0 +-- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state) +PREHOOK: type: QUERY +POSTHOOK: query: -- 3 way and multi-attribute join +-- Expected output rows: 0 +-- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (and (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)) (= (. (TOK_TABLE_OR_COL e) lastname) (. (TOK_TABLE_OR_COL d) deptname)))) (TOK_TABREF (TOK_TABNAME loc_orc) l) (and (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL l) locid)) (= (. (TOK_TABLE_OR_COL e) lastname) (. (TOK_TABLE_OR_COL l) state))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + expr: deptname + type: string + sort order: ++ + Map-reduce partition columns: + expr: deptid + type: int + expr: deptname + type: string + Statistics: + numRows: 4 dataSize: 384 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 1 + value expressions: + expr: deptid + type: int + expr: deptname + type: string + e + TableScan + alias: e + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + expr: lastname + type: string + sort order: ++ + Map-reduce partition columns: + expr: deptid + type: int + expr: lastname + type: string + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + l + TableScan + alias: l + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: locid + type: int + expr: state + type: string + sort order: ++ + Map-reduce partition columns: + expr: locid + type: int + expr: state + type: string + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 2 + value expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns deptid,deptname + columns.types int:string + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numRows 4 + rawDataSize 384 + serialization.ddl struct dept_orc { i32 deptid, string deptname} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 329 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e] + /loc_orc [l] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: int + expr: _col5 + type: string + expr: _col8 + type: string + expr: _col9 + type: int + expr: _col10 + type: bigint + expr: _col11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types string:int:int:string:string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/annotate_stats_limit.q.out ql/src/test/results/clientpositive/annotate_stats_limit.q.out new file mode 100644 index 0000000..c6a446c --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_limit.q.out @@ -0,0 +1,237 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_orc compute statistics for columns state, locid, zip, year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state, locid, zip, year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + ListSink + + +PREHOOK: query: -- numRows: 4 rawDataSize: 396 +explain extended select * from loc_orc limit 4 +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 4 rawDataSize: 396 +explain extended select * from loc_orc limit 4 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 4))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 4 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 4 dataSize: 396 basicStatsState: COMPLETE colStatsState: COMPLETE + ListSink + + +PREHOOK: query: -- greater than the available number of rows +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc limit 16 +PREHOOK: type: QUERY +POSTHOOK: query: -- greater than the available number of rows +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc limit 16 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 16))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 16 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + ListSink + + +PREHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc limit 0 +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 0))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + ListSink + + diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out new file mode 100644 index 0000000..914a114 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -0,0 +1,1764 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table if not exists loc_orc ( + state string, + locid int, + zip bigint +) partitioned by(year int) stored as orc +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_orc ( + state string, + locid int, + zip bigint +) partitioned by(year int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: -- basicStatState: NONE colStatState: NONE +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: NONE colStatState: NONE +explain extended select * from loc_orc +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + ListSink + + +PREHOOK: query: insert overwrite table loc_orc partition(year) select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc partition(year) select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc@year=2001 +POSTHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE false + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows -1 + partition_columns year + rawDataSize -1 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE false + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows -1 + partition_columns year + rawDataSize -1 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 0 dataSize: 727 basicStatsState: PARTIAL colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 727 basicStatsState: PARTIAL colStatsState: NONE + ListSink + + +PREHOOK: query: -- partition level analyze statistics for specific parition +analyze table loc_orc partition(year=2001) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +PREHOOK: Input: default@loc_orc@year=2001 +PREHOOK: Output: default@loc_orc +PREHOOK: Output: default@loc_orc@year=2001 +POSTHOOK: query: -- partition level analyze statistics for specific parition +analyze table loc_orc partition(year=2001) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +POSTHOOK: Input: default@loc_orc@year=2001 +POSTHOOK: Output: default@loc_orc +POSTHOOK: Output: default@loc_orc@year=2001 +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__')))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE false + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows -1 + partition_columns year + rawDataSize -1 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 0 dataSize: 325 basicStatsState: PARTIAL colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 325 basicStatsState: PARTIAL colStatsState: NONE + ListSink + + +PREHOOK: query: -- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: PARTIAL colStatState: NONE +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE false + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows -1 + partition_columns year + rawDataSize -1 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 7 dataSize: 727 basicStatsState: PARTIAL colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 7 dataSize: 727 basicStatsState: PARTIAL colStatsState: NONE + ListSink + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year=2001 +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year=2001 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL year) 2001)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 7 dataSize: 402 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 7 dataSize: 402 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- partition level analyze statistics for all partitions +analyze table loc_orc partition(year) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +PREHOOK: Input: default@loc_orc@year=2001 +PREHOOK: Input: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +PREHOOK: Output: default@loc_orc +PREHOOK: Output: default@loc_orc@year=2001 +PREHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: query: -- partition level analyze statistics for all partitions +analyze table loc_orc partition(year) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +POSTHOOK: Input: default@loc_orc@year=2001 +POSTHOOK: Input: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Output: default@loc_orc +POSTHOOK: Output: default@loc_orc@year=2001 +POSTHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__')))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year=2001 or year='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from loc_orc where year=2001 or year='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (= (TOK_TABLE_OR_COL year) 2001) (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__'))))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- both partitions will be pruned +-- basicStatState: NONE colStatState: NONE +explain extended select * from loc_orc where year=2001 and year='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +POSTHOOK: query: -- both partitions will be pruned +-- basicStatState: NONE colStatState: NONE +explain extended select * from loc_orc where year=2001 and year='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL year) 2001) (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__'))))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: ((year = 2001) and (year = '__HIVE_DEFAULT_PARTITION__')) + type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + ListSink + + +PREHOOK: query: -- partition level partial column statistics +analyze table loc_orc partition(year=2001) compute statistics for columns state,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +PREHOOK: Input: default@loc_orc@year=2001 +#### A masked pattern was here #### +POSTHOOK: query: -- partition level partial column statistics +analyze table loc_orc partition(year=2001) compute statistics for columns state,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +POSTHOOK: Input: default@loc_orc@year=2001 +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select zip from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select zip from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL zip))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: zip + type: bigint + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc +#### A masked pattern was here #### + Partition + base file name: year=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=2001 [loc_orc] + /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc +#### A masked pattern was here #### + Partition + base file name: year=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=2001 [loc_orc] + /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state,locid from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select state,locid from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc +#### A masked pattern was here #### + Partition + base file name: year=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=2001 [loc_orc] + /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select state,locid from loc_orc where year=2001 +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select state,locid from loc_orc where year=2001 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_WHERE (= (TOK_TABLE_OR_COL year) 2001)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 7 dataSize: 402 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 7 dataSize: 630 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 7 dataSize: 630 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=2001 [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select state,locid from loc_orc where year!=2001 +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select state,locid from loc_orc where year!=2001 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_WHERE (!= (TOK_TABLE_OR_COL year) 2001)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (year <> 2001) + type: boolean + Statistics: + numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 325 basicStatsState: COMPLETE colStatsState: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 402 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 325 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 727 basicStatsState: COMPLETE colStatsState: PARTIAL + ListSink + + diff --git ql/src/test/results/clientpositive/annotate_stats_select.q.out ql/src/test/results/clientpositive/annotate_stats_select.q.out new file mode 100644 index 0000000..17311da --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -0,0 +1,4646 @@ +PREHOOK: query: create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@alltypes +PREHOOK: query: create table alltypes_orc like alltypes +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table alltypes_orc like alltypes +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: alter table alltypes_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: alter table alltypes_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: load data local inpath '../../data/files/alltypes.txt' overwrite into table alltypes +PREHOOK: type: LOAD +PREHOOK: Output: default@alltypes +POSTHOOK: query: load data local inpath '../../data/files/alltypes.txt' overwrite into table alltypes +POSTHOOK: type: LOAD +POSTHOOK: Output: default@alltypes +PREHOOK: query: insert overwrite table alltypes_orc select * from alltypes +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: insert overwrite table alltypes_orc select * from alltypes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes +POSTHOOK: Output: default@alltypes_orc +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + expr: ti1 + type: tinyint + expr: si1 + type: smallint + expr: i1 + type: int + expr: bi1 + type: bigint + expr: f1 + type: float + expr: d1 + type: double + expr: de1 + type: decimal(10,0) + expr: ts1 + type: timestamp + expr: da1 + type: timestamp + expr: s1 + type: string + expr: m1 + type: map + expr: l1 + type: array + expr: st1 + type: struct + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- statistics for complex types are not supported yet +analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1,s1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +POSTHOOK: query: -- statistics for complex types are not supported yet +analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1,s1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +PREHOOK: query: -- numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + expr: ti1 + type: tinyint + expr: si1 + type: smallint + expr: i1 + type: int + expr: bi1 + type: bigint + expr: f1 + type: float + expr: d1 + type: double + expr: de1 + type: decimal(10,0) + expr: ts1 + type: timestamp + expr: da1 + type: timestamp + expr: s1 + type: string + expr: m1 + type: map + expr: l1 + type: array + expr: st1 + type: struct + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: PARTIAL + ListSink + + +PREHOOK: query: -- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bo1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types boolean + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- col alias renaming +-- numRows: 2 rawDataSize: 8 +explain extended select i1 as int1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- col alias renaming +-- numRows: 2 rawDataSize: 8 +explain extended select i1 as int1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1) int1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: i1 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 174 +explain extended select s1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 174 +explain extended select s1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL s1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: s1 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 174 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 174 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- column statistics for complex types unsupported and so statistics will not be updated +-- numRows: 2 rawDataSize: 1514 +explain extended select m1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- column statistics for complex types unsupported and so statistics will not be updated +-- numRows: 2 rawDataSize: 1514 +explain extended select m1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: m1 + type: map + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types map + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 246 +explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 246 +explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bo1)) (TOK_SELEXPR (TOK_TABLE_OR_COL ti1)) (TOK_SELEXPR (TOK_TABLE_OR_COL si1)) (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR (TOK_TABLE_OR_COL bi1)) (TOK_SELEXPR (TOK_TABLE_OR_COL f1)) (TOK_SELEXPR (TOK_TABLE_OR_COL d1)) (TOK_SELEXPR (TOK_TABLE_OR_COL s1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + expr: ti1 + type: tinyint + expr: si1 + type: smallint + expr: i1 + type: int + expr: bi1 + type: bigint + expr: f1 + type: float + expr: d1 + type: double + expr: s1 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: + numRows: 2 dataSize: 246 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 246 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types boolean:tinyint:smallint:int:bigint:float:double:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 0 +explain extended select null from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 0 +explain extended select null from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_NULL)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: null + type: string + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 8 +explain extended select 11 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 8 +explain extended select 11 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 11)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: 11 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select 11L from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select 11L from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 11L)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: 11 + type: bigint + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select 11.0 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select 11.0 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 11.0)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: 11.0 + type: double + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 178 +explain extended select "hello" from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 178 +explain extended select "hello" from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR "hello")))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: 'hello' + type: string + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select cast("hello" as char(5)) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select cast("hello" as char(5)) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION (TOK_CHAR 5) "hello"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: CAST( 'hello' AS CHAR(5) + type: char(5) + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types char(5) + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select cast("hello" as varchar(5)) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select cast("hello" as varchar(5)) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION (TOK_VARCHAR 5) "hello"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: CAST( 'hello' AS varchar(5)) + type: varchar(5) + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types varchar(5) + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 96 +explain extended select unbase64("0xe23") from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 96 +explain extended select unbase64("0xe23") from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION unbase64 "0xe23"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: unbase64('0xe23') + type: binary + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 96 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 96 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types binary + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_TINYINT "1")) (TOK_SELEXPR (TOK_FUNCTION TOK_SMALLINT "20"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: UDFToByte('1') + type: tinyint + expr: UDFToShort('20') + type: smallint + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types tinyint:smallint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 80 +explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 80 +explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP "1970-12-31 15:59:58.174"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: CAST( '1970-12-31 15:59:58.174' AS TIMESTAMP) + type: timestamp + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 80 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 80 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types timestamp + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DATE "1970-12-31 15:59:58.174"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: CAST( '1970-12-31 15:59:58.174' AS DATE) + type: date + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types date + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 224 +explain extended select cast("58.174" as DECIMAL) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 224 +explain extended select cast("58.174" as DECIMAL) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DECIMAL "58.174"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: CAST( '58.174' AS decimal(10,0)) + type: decimal(10,0) + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 224 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 224 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types decimal(10,0) + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select array(1,2,3) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select array(1,2,3) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION array 1 2 3))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: array(1,2,3) + type: array + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types array + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 1508 +explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 1508 +explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION str_to_map "a=1 b=2 c=3" " " "="))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: str_to_map('a=1 b=2 c=3',' ','=') + type: map + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 1508 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 1508 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types map + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION NAMED_STRUCT "a" 11 "b" 11))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: named_struct('a',11,'b',11) + type: struct + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 250 +explain extended select CREATE_UNION(0, "hello") from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 250 +explain extended select CREATE_UNION(0, "hello") from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION CREATE_UNION 0 "hello"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: create_union(0,'hello') + type: uniontype + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 250 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 250 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types uniontype + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 8 +explain extended select count(*) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 8 +explain extended select count(*) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 8 +explain extended select count(1) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 8 +explain extended select count(1) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- column statistics for complex column types will be missing. data size will be calculated from available column statistics +-- numRows: 2 rawDataSize: 254 +explain extended select *,11 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- column statistics for complex column types will be missing. data size will be calculated from available column statistics +-- numRows: 2 rawDataSize: 254 +explain extended select *,11 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR 11)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + expr: ti1 + type: tinyint + expr: si1 + type: smallint + expr: i1 + type: int + expr: bi1 + type: bigint + expr: f1 + type: float + expr: d1 + type: double + expr: de1 + type: decimal(10,0) + expr: ts1 + type: timestamp + expr: da1 + type: timestamp + expr: s1 + type: string + expr: m1 + type: map + expr: l1 + type: array + expr: st1 + type: struct + expr: 11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: + numRows: 2 dataSize: 254 basicStatsState: COMPLETE colStatsState: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 254 basicStatsState: COMPLETE colStatsState: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- subquery selects +-- inner select - numRows: 2 rawDataSize: 8 +-- outer select - numRows: 2 rawDataSize: 8 +explain extended select i1 from (select i1 from alltypes_orc limit 10) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- subquery selects +-- inner select - numRows: 2 rawDataSize: 8 +-- outer select - numRows: 2 rawDataSize: 8 +explain extended select i1 from (select i1 from alltypes_orc limit 10) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1))) (TOK_LIMIT 10))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + temp:alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: i1 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [temp:alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- inner select - numRows: 2 rawDataSize: 16 +-- outer select - numRows: 2 rawDataSize: 8 +explain extended select i1 from (select i1,11 from alltypes_orc limit 10) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- inner select - numRows: 2 rawDataSize: 16 +-- outer select - numRows: 2 rawDataSize: 8 +explain extended select i1 from (select i1,11 from alltypes_orc limit 10) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR 11)) (TOK_LIMIT 10))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + temp:alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: i1 + type: int + expr: 11 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [temp:alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- inner select - numRows: 2 rawDataSize: 16 +-- outer select - numRows: 2 rawDataSize: 186 +explain extended select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- inner select - numRows: 2 rawDataSize: 16 +-- outer select - numRows: 2 rawDataSize: 186 +explain extended select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR 11)) (TOK_LIMIT 10))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR "hello")))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + temp:alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: i1 + type: int + expr: 11 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [temp:alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: int + expr: 'hello' + type: string + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- inner select - numRows: 2 rawDataSize: 24 +-- outer select - numRows: 2 rawDataSize: 16 +explain extended select x from (select i1,11.0 as x from alltypes_orc limit 10) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- inner select - numRows: 2 rawDataSize: 24 +-- outer select - numRows: 2 rawDataSize: 16 +explain extended select x from (select i1,11.0 as x from alltypes_orc limit 10) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR 11.0 x)) (TOK_LIMIT 10))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + temp:alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: i1 + type: int + expr: 11.0 + type: double + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 24 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 24 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 2 dataSize: 24 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: double + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [temp:alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 2 dataSize: 24 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 24 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col1 + type: double + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- inner select - numRows: 2 rawDataSize: 104 +-- outer select - numRows: 2 rawDataSize: 186 +explain extended select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- inner select - numRows: 2 rawDataSize: 104 +-- outer select - numRows: 2 rawDataSize: 186 +explain extended select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1) x) (TOK_SELEXPR (TOK_FUNCTION unbase64 "0xe23") ub)) (TOK_LIMIT 10))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x)) (TOK_SELEXPR "hello")))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + temp:alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: i1 + type: int + expr: unbase64('0xe23') + type: binary + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 104 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 104 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 2 dataSize: 104 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: binary + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [temp:alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 2 dataSize: 104 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 104 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: int + expr: 'hello' + type: string + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- inner select - numRows: 2 rawDataSize: 186 +-- middle select - numRows: 2 rawDataSize: 178 +-- outer select - numRows: 2 rawDataSize: 194 +explain extended select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2 +PREHOOK: type: QUERY +POSTHOOK: query: -- inner select - numRows: 2 rawDataSize: 186 +-- middle select - numRows: 2 rawDataSize: 178 +-- outer select - numRows: 2 rawDataSize: 194 +explain extended select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR "hello" hell)) (TOK_LIMIT 10))) in1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL hell) h)) (TOK_LIMIT 10))) in2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL h)) (TOK_SELEXPR 11.0)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + in2:in1:alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: i1 + type: int + expr: 'hello' + type: string + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: int + expr: _col1 + type: string + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [in2:in1:alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 186 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + GatherStats: false + Reduce Output Operator + sort order: + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: string + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Limit + Statistics: + numRows: 2 dataSize: 178 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: 11.0 + type: double + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 194 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 194 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- This test is for FILTER operator where filter expression is a boolean column +-- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc where bo1 +PREHOOK: type: QUERY +POSTHOOK: query: -- This test is for FILTER operator where filter expression is a boolean column +-- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc where bo1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bo1))) (TOK_WHERE (TOK_TABLE_OR_COL bo1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: bo1 + type: boolean + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: bo1 + type: boolean + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types boolean + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select bo1 from alltypes_orc where !bo1 +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select bo1 from alltypes_orc where !bo1 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bo1))) (TOK_WHERE (! (TOK_TABLE_OR_COL bo1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1064 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not bo1) + type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: bo1 + type: boolean + outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types boolean + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numRows 2 + rawDataSize 1064 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1350 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/annotate_stats_table.q.out ql/src/test/results/clientpositive/annotate_stats_table.q.out new file mode 100644 index 0000000..64e45c7 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -0,0 +1,698 @@ +PREHOOK: query: create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@emp_staging +PREHOOK: query: create table if not exists emp_orc like emp_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists emp_orc like emp_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@emp_orc +PREHOOK: query: alter table emp_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc +POSTHOOK: query: alter table emp_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc +PREHOOK: query: -- basicStatState: NONE colStatState: NONE +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: NONE colStatState: NONE +explain extended select * from emp_orc +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + ListSink + + +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@emp_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@emp_staging +PREHOOK: query: insert overwrite table emp_orc select * from emp_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_staging +PREHOOK: Output: default@emp_orc +POSTHOOK: query: insert overwrite table emp_orc select * from emp_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_staging +POSTHOOK: Output: default@emp_orc +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL colStatState: NONE +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL colStatState: NONE +explain extended select * from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + numRows: 0 dataSize: 349 basicStatsState: PARTIAL colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 349 basicStatsState: PARTIAL colStatsState: NONE + ListSink + + +PREHOOK: query: -- table level analyze statistics +analyze table emp_orc compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc +POSTHOOK: query: -- table level analyze statistics +analyze table emp_orc compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: NONE +explain extended select * from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- column level partial statistics +analyze table emp_orc compute statistics for columns deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: query: -- column level partial statistics +analyze table emp_orc compute statistics for columns deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL +explain extended select * from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: PARTIAL + ListSink + + +PREHOOK: query: -- all selected columns have statistics +-- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select deptid from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- all selected columns have statistics +-- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select deptid from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL deptid))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + emp_orc + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: deptid + type: int + outputColumnNames: _col0 + Statistics: + numRows: 6 dataSize: 20 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 20 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /emp_orc [emp_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- column level complete statistics +analyze table emp_orc compute statistics for columns lastname,deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: query: -- column level complete statistics +analyze table emp_orc compute statistics for columns lastname,deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select * from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + ListSink + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select lastname from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select lastname from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL lastname))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + emp_orc + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + outputColumnNames: _col0 + Statistics: + numRows: 6 dataSize: 546 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 546 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /emp_orc [emp_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select deptid from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select deptid from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL deptid))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + emp_orc + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: deptid + type: int + outputColumnNames: _col0 + Statistics: + numRows: 6 dataSize: 20 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 20 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /emp_orc [emp_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select lastname,deptid from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE colStatState: COMPLETE +explain extended select lastname,deptid from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL lastname)) (TOK_SELEXPR (TOK_TABLE_OR_COL deptid))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + emp_orc + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 349 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 566 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 566 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 349 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /emp_orc [emp_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/annotate_stats_union.q.out ql/src/test/results/clientpositive/annotate_stats_union.q.out new file mode 100644 index 0000000..03235e6 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_union.q.out @@ -0,0 +1,1127 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 688 +explain extended select state from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 688 +explain extended select state from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:tmp-subquery1:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + null-subquery2:tmp-subquery2:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [null-subquery1:tmp-subquery1:loc_orc, null-subquery2:tmp-subquery2:loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + ListSink + + +PREHOOK: query: -- numRows: 16 rawDataSize: 1592 +explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 16 rawDataSize: 1592 +explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:tmp-subquery1:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: bigint + expr: _col3 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + null-subquery2:tmp-subquery2:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: bigint + expr: _col3 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [null-subquery1:tmp-subquery1:loc_orc, null-subquery2:tmp-subquery2:loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: create database test +PREHOOK: type: CREATEDATABASE +POSTHOOK: query: create database test +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: use test +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: use test +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: test@loc_staging +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: test@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: test@loc_orc +PREHOOK: Output: test@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: test@loc_orc +POSTHOOK: Output: test@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: test@loc_staging +POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: test@loc_staging +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: test@loc_staging +PREHOOK: Output: test@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: test@loc_staging +POSTHOOK: Output: test@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_staging compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: test@loc_staging +PREHOOK: Output: test@loc_staging +POSTHOOK: query: analyze table loc_staging compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: test@loc_staging +POSTHOOK: Output: test@loc_staging +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_staging compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: test@loc_staging +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_staging compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: test@loc_staging +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: test@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: test@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME default loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)))))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:temp-subquery1:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + null-subquery2:temp-subquery2:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: test.loc_orc + name: test.loc_orc + Truncated Path -> Alias: + /loc_orc [null-subquery1:temp-subquery1:loc_orc] + /test.db/loc_orc [null-subquery2:temp-subquery2:loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 16 rawDataSize: 1376 +explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test loc_staging))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)))))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:temp-subquery1:loc_staging + TableScan + alias: loc_staging + Statistics: + numRows: 8 dataSize: 117 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + null-subquery2:temp-subquery2:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_orc + numFiles 1 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 489 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: test.loc_orc + name: test.loc_orc +#### A masked pattern was here #### + Partition + base file name: loc_staging + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_staging + numFiles 1 + numRows 8 + rawDataSize 117 + serialization.ddl struct loc_staging { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 125 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_staging + numFiles 1 + numRows 8 + rawDataSize 117 + serialization.ddl struct loc_staging { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 125 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: test.loc_staging + name: test.loc_staging + Truncated Path -> Alias: + /test.db/loc_orc [null-subquery2:temp-subquery2:loc_orc] + /test.db/loc_staging [null-subquery1:temp-subquery1:loc_staging] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/auto_join_reordering_values.q.out ql/src/test/results/clientpositive/auto_join_reordering_values.q.out index 7442b75..19c446b 100644 --- ql/src/test/results/clientpositive/auto_join_reordering_values.q.out +++ ql/src/test/results/clientpositive/auto_join_reordering_values.q.out @@ -111,6 +111,8 @@ STAGE PLANS: dim_pay_date TableScan alias: dim_pay_date + Statistics: + numRows: 1 dataSize: 36 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -120,6 +122,8 @@ STAGE PLANS: Map-reduce partition columns: expr: date type: string + Statistics: + numRows: 1 dataSize: 36 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: date @@ -127,6 +131,8 @@ STAGE PLANS: orderpayment TableScan alias: orderpayment + Statistics: + numRows: 1 dataSize: 36 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -136,6 +142,8 @@ STAGE PLANS: Map-reduce partition columns: expr: date type: string + Statistics: + numRows: 1 dataSize: 36 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: dealid @@ -244,6 +252,8 @@ STAGE PLANS: deal TableScan alias: deal + Statistics: + numRows: 1 dataSize: 36 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -253,6 +263,8 @@ STAGE PLANS: Map-reduce partition columns: expr: dealid type: int + Statistics: + numRows: 1 dataSize: 36 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: dealid @@ -378,6 +390,8 @@ STAGE PLANS: order_city TableScan alias: order_city + Statistics: + numRows: 1 dataSize: 36 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -387,6 +401,8 @@ STAGE PLANS: Map-reduce partition columns: expr: cityid type: int + Statistics: + numRows: 1 dataSize: 36 basicStatsState: COMPLETE colStatsState: NONE tag: 1 Path -> Alias: #### A masked pattern was here #### @@ -507,6 +523,8 @@ STAGE PLANS: user TableScan alias: user + Statistics: + numRows: 100 dataSize: 288 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -516,6 +534,8 @@ STAGE PLANS: Map-reduce partition columns: expr: userid type: int + Statistics: + numRows: 100 dataSize: 288 basicStatsState: COMPLETE colStatsState: NONE tag: 1 Path -> Alias: #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out index f347500..cdec74e 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out @@ -97,6 +97,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -290,6 +292,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -1070,6 +1074,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out index 7568371..ad61ec5 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_11.q.out @@ -141,6 +141,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 114 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -158,6 +160,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -358,7 +362,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@bucket_big @@ -450,6 +453,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 114 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -467,6 +472,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -667,7 +674,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@bucket_big @@ -752,6 +758,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 114 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -776,6 +784,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out index c5435a2..d2efff4 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_12.q.out @@ -264,6 +264,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 114 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -279,6 +281,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 170 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -294,6 +298,8 @@ STAGE PLANS: d TableScan alias: d + Statistics: + numRows: 0 dataSize: 170 basicStatsState: PARTIAL colStatsState: COMPLETE GatherStats: false HashTable Sink Operator condition expressions: @@ -311,6 +317,8 @@ STAGE PLANS: c TableScan alias: c + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out index 56f9d3d..5b1bbcf 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out @@ -81,6 +81,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5500 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -863,6 +865,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5500 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out index 241bc7f..50a382a 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out @@ -81,6 +81,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -229,6 +231,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -961,6 +965,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out index 2e41dde..1495d2f 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out @@ -93,6 +93,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -241,6 +243,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -973,6 +977,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out index 62857a9..fa731bc 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out @@ -66,6 +66,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -210,6 +212,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -716,6 +720,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out index a9f3a54..1c0020f 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out @@ -106,6 +106,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5500 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -301,6 +303,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5500 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -1212,6 +1216,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5500 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out index e7f1a40..a8e60c8 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out @@ -106,6 +106,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -301,6 +303,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -1214,6 +1218,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/binary_output_format.q.out ql/src/test/results/clientpositive/binary_output_format.q.out index 47a27ad..51a3983 100644 --- ql/src/test/results/clientpositive/binary_output_format.q.out +++ ql/src/test/results/clientpositive/binary_output_format.q.out @@ -69,6 +69,8 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -77,6 +79,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Transform Operator command: cat output info: @@ -90,11 +94,15 @@ STAGE PLANS: serialization.last.column.takes.rest true serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/bucket1.q.out ql/src/test/results/clientpositive/bucket1.q.out index f58d77a..bfce6d5 100644 --- ql/src/test/results/clientpositive/bucket1.q.out +++ ql/src/test/results/clientpositive/bucket1.q.out @@ -26,6 +26,8 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -34,11 +36,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator sort order: Map-reduce partition columns: expr: UDFToInteger(_col0) type: int + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -95,6 +101,8 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -102,11 +110,15 @@ STAGE PLANS: expr: _col1 type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/bucket2.q.out ql/src/test/results/clientpositive/bucket2.q.out index 27cbdd6..95a3a7a 100644 --- ql/src/test/results/clientpositive/bucket2.q.out +++ ql/src/test/results/clientpositive/bucket2.q.out @@ -26,6 +26,8 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -34,11 +36,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator sort order: Map-reduce partition columns: expr: UDFToInteger(_col0) type: int + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -95,6 +101,8 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -102,11 +110,15 @@ STAGE PLANS: expr: _col1 type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 2 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/bucket3.q.out ql/src/test/results/clientpositive/bucket3.q.out index 8c716b2..bdbbf23 100644 --- ql/src/test/results/clientpositive/bucket3.q.out +++ ql/src/test/results/clientpositive/bucket3.q.out @@ -26,6 +26,8 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -34,11 +36,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator sort order: Map-reduce partition columns: expr: UDFToInteger(_col0) type: int + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -95,6 +101,8 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -102,12 +110,16 @@ STAGE PLANS: expr: _col1 type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 2 Static Partition Specification: ds=1/ + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/bucket_map_join_1.q.out ql/src/test/results/clientpositive/bucket_map_join_1.q.out index 5443a4d..90667ac 100644 --- ql/src/test/results/clientpositive/bucket_map_join_1.q.out +++ ql/src/test/results/clientpositive/bucket_map_join_1.q.out @@ -65,6 +65,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 21 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -82,6 +84,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 20 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucket_map_join_2.q.out ql/src/test/results/clientpositive/bucket_map_join_2.q.out index 8dcaeb3..a02cc02 100644 --- ql/src/test/results/clientpositive/bucket_map_join_2.q.out +++ ql/src/test/results/clientpositive/bucket_map_join_2.q.out @@ -65,6 +65,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 21 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -82,6 +84,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 20 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketcontext_1.q.out ql/src/test/results/clientpositive/bucketcontext_1.q.out index 3c89675..9261999 100644 --- ql/src/test/results/clientpositive/bucketcontext_1.q.out +++ ql/src/test/results/clientpositive/bucketcontext_1.q.out @@ -139,6 +139,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -163,6 +165,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -359,6 +363,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketcontext_2.q.out ql/src/test/results/clientpositive/bucketcontext_2.q.out index 14db71d..daa9d5d 100644 --- ql/src/test/results/clientpositive/bucketcontext_2.q.out +++ ql/src/test/results/clientpositive/bucketcontext_2.q.out @@ -127,6 +127,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -151,6 +153,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5500 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -347,6 +351,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5500 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketcontext_3.q.out ql/src/test/results/clientpositive/bucketcontext_3.q.out index ad03033..cac1e07 100644 --- ql/src/test/results/clientpositive/bucketcontext_3.q.out +++ ql/src/test/results/clientpositive/bucketcontext_3.q.out @@ -169,6 +169,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5500 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -193,6 +195,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -344,6 +348,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketcontext_4.q.out ql/src/test/results/clientpositive/bucketcontext_4.q.out index a5184b1..4b1254d 100644 --- ql/src/test/results/clientpositive/bucketcontext_4.q.out +++ ql/src/test/results/clientpositive/bucketcontext_4.q.out @@ -181,6 +181,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -205,6 +207,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -356,6 +360,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketcontext_5.q.out ql/src/test/results/clientpositive/bucketcontext_5.q.out index 7d954aa..a493151 100644 --- ql/src/test/results/clientpositive/bucketcontext_5.q.out +++ ql/src/test/results/clientpositive/bucketcontext_5.q.out @@ -69,6 +69,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -93,6 +95,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -240,6 +244,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketcontext_6.q.out ql/src/test/results/clientpositive/bucketcontext_6.q.out index 9a3af93..13fc155 100644 --- ql/src/test/results/clientpositive/bucketcontext_6.q.out +++ ql/src/test/results/clientpositive/bucketcontext_6.q.out @@ -83,6 +83,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -107,6 +109,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5500 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -301,6 +305,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5500 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketcontext_7.q.out ql/src/test/results/clientpositive/bucketcontext_7.q.out index 1876e5a..8aa6fde 100644 --- ql/src/test/results/clientpositive/bucketcontext_7.q.out +++ ql/src/test/results/clientpositive/bucketcontext_7.q.out @@ -194,6 +194,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -218,6 +220,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5500 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -416,6 +420,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5500 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketcontext_8.q.out ql/src/test/results/clientpositive/bucketcontext_8.q.out index afda011..21a3d5a 100644 --- ql/src/test/results/clientpositive/bucketcontext_8.q.out +++ ql/src/test/results/clientpositive/bucketcontext_8.q.out @@ -194,6 +194,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5500 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -218,6 +220,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -416,6 +420,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketmapjoin1.q.out ql/src/test/results/clientpositive/bucketmapjoin1.q.out index 03e6cbd..74deb9d 100644 --- ql/src/test/results/clientpositive/bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin1.q.out @@ -44,12 +44,16 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (ds = '2008-04-08') type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE HashTable Sink Operator condition expressions: 0 {key} {value} @@ -71,6 +75,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -165,6 +171,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -187,12 +195,16 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (ds = '2008-04-08') type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -401,6 +413,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -425,6 +439,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -892,6 +908,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -916,6 +934,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketmapjoin10.q.out ql/src/test/results/clientpositive/bucketmapjoin10.q.out index c50ec2e..4d8f077 100644 --- ql/src/test/results/clientpositive/bucketmapjoin10.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin10.q.out @@ -216,6 +216,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 6950 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -233,6 +235,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 6950 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketmapjoin11.q.out ql/src/test/results/clientpositive/bucketmapjoin11.q.out index a51124a..228ba73 100644 --- ql/src/test/results/clientpositive/bucketmapjoin11.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin11.q.out @@ -222,6 +222,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 8562 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -246,6 +248,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 8562 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -540,6 +544,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 8562 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -564,6 +570,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 8562 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketmapjoin12.q.out ql/src/test/results/clientpositive/bucketmapjoin12.q.out index c603933..0967d93 100644 --- ql/src/test/results/clientpositive/bucketmapjoin12.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin12.q.out @@ -148,6 +148,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -172,6 +174,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -380,6 +384,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -397,6 +403,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -510,6 +518,7 @@ STAGE PLANS: Fetch Operator limit: -1 + PREHOOK: query: SELECT /*+ MAPJOIN(b) */ count(*) FROM srcbucket_mapjoin_part_1 a JOIN srcbucket_mapjoin_part_3 b ON a.key = b.key AND a.part = '1' and b.part = '1' diff --git ql/src/test/results/clientpositive/bucketmapjoin13.q.out ql/src/test/results/clientpositive/bucketmapjoin13.q.out index 55e77f8..cd37cb9 100644 --- ql/src/test/results/clientpositive/bucketmapjoin13.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin13.q.out @@ -155,6 +155,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -172,6 +174,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 1000 dataSize: 10624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -438,6 +442,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -462,6 +468,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -697,6 +705,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -721,6 +731,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -958,6 +970,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -982,6 +996,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketmapjoin2.q.out ql/src/test/results/clientpositive/bucketmapjoin2.q.out index e62c279..ce6a415 100644 --- ql/src/test/results/clientpositive/bucketmapjoin2.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin2.q.out @@ -140,6 +140,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 3062 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -164,6 +166,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -676,6 +680,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -700,6 +706,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 3062 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -1452,6 +1460,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 6124 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -1476,6 +1486,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketmapjoin3.q.out ql/src/test/results/clientpositive/bucketmapjoin3.q.out index 4cb948d..e0cc4f0 100644 --- ql/src/test/results/clientpositive/bucketmapjoin3.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin3.q.out @@ -157,6 +157,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -181,6 +183,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 3062 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -693,6 +697,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 3062 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -717,6 +723,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketmapjoin4.q.out ql/src/test/results/clientpositive/bucketmapjoin4.q.out index 2f1776b..8f75272 100644 --- ql/src/test/results/clientpositive/bucketmapjoin4.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin4.q.out @@ -115,6 +115,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -139,6 +141,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -598,6 +602,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -622,6 +628,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketmapjoin5.q.out ql/src/test/results/clientpositive/bucketmapjoin5.q.out index 8d979a4..4a34947 100644 --- ql/src/test/results/clientpositive/bucketmapjoin5.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin5.q.out @@ -153,6 +153,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -177,6 +179,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -494,6 +498,7 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b @@ -690,6 +695,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -714,6 +721,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 6124 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketmapjoin8.q.out ql/src/test/results/clientpositive/bucketmapjoin8.q.out index bccc51b..f5b16eb 100644 --- ql/src/test/results/clientpositive/bucketmapjoin8.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin8.q.out @@ -121,6 +121,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -145,6 +147,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -362,6 +366,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -386,6 +392,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketmapjoin9.q.out ql/src/test/results/clientpositive/bucketmapjoin9.q.out index ddceb57..32c9b6b 100644 --- ql/src/test/results/clientpositive/bucketmapjoin9.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin9.q.out @@ -127,6 +127,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -144,6 +146,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -389,6 +393,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -406,6 +412,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out index 65c2a4d..b3ca688 100644 --- ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out @@ -123,6 +123,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -140,6 +142,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out index c89a118..3058fdf 100644 --- ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out @@ -171,6 +171,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 6124 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -195,6 +197,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/bucketmapjoin_negative3.q.out ql/src/test/results/clientpositive/bucketmapjoin_negative3.q.out index e55f429..011f48a 100644 --- ql/src/test/results/clientpositive/bucketmapjoin_negative3.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin_negative3.q.out @@ -131,6 +131,8 @@ STAGE PLANS: r TableScan alias: r + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -155,6 +157,8 @@ STAGE PLANS: l TableScan alias: l + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -282,6 +286,8 @@ STAGE PLANS: r TableScan alias: r + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -306,6 +312,8 @@ STAGE PLANS: l TableScan alias: l + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -435,6 +443,8 @@ STAGE PLANS: r TableScan alias: r + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -452,6 +462,8 @@ STAGE PLANS: l TableScan alias: l + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -578,6 +590,8 @@ STAGE PLANS: r TableScan alias: r + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -595,6 +609,8 @@ STAGE PLANS: l TableScan alias: l + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -721,6 +737,8 @@ STAGE PLANS: r TableScan alias: r + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -738,6 +756,8 @@ STAGE PLANS: l TableScan alias: l + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -864,6 +884,8 @@ STAGE PLANS: r TableScan alias: r + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -881,6 +903,8 @@ STAGE PLANS: l TableScan alias: l + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -1007,6 +1031,8 @@ STAGE PLANS: r TableScan alias: r + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -1024,6 +1050,8 @@ STAGE PLANS: l TableScan alias: l + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -1150,6 +1178,8 @@ STAGE PLANS: r TableScan alias: r + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -1167,6 +1197,8 @@ STAGE PLANS: l TableScan alias: l + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -1293,6 +1325,8 @@ STAGE PLANS: r TableScan alias: r + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -1310,6 +1344,8 @@ STAGE PLANS: l TableScan alias: l + Statistics: + numRows: 0 dataSize: 4200 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/combine2_hadoop20.q.out ql/src/test/results/clientpositive/combine2_hadoop20.q.out index 0afef9b..2f2646e 100644 --- ql/src/test/results/clientpositive/combine2_hadoop20.q.out +++ ql/src/test/results/clientpositive/combine2_hadoop20.q.out @@ -219,16 +219,24 @@ STAGE PLANS: combine2 TableScan alias: combine2 + Statistics: + numRows: 12 dataSize: 14 basicStatsState: COMPLETE colStatsState: COMPLETE GatherStats: false Select Operator + Statistics: + numRows: 12 dataSize: 14 basicStatsState: COMPLETE colStatsState: COMPLETE Group By Operator aggregations: expr: count(1) bucketGroup: false mode: hash outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE Reduce Output Operator sort order: + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE tag: -1 value expressions: expr: _col0 @@ -581,16 +589,22 @@ STAGE PLANS: bucketGroup: false mode: mergepartial outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE Select Operator expressions: expr: _col0 type: bigint outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/ctas_hadoop20.q.out ql/src/test/results/clientpositive/ctas_hadoop20.q.out index 802a7a8..45d79a5 100644 --- ql/src/test/results/clientpositive/ctas_hadoop20.q.out +++ ql/src/test/results/clientpositive/ctas_hadoop20.q.out @@ -754,6 +754,8 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -762,6 +764,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -769,6 +773,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -825,7 +831,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Limit + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -857,6 +867,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -891,12 +903,18 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Limit + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out index 8f201ee..3a88dd9 100644 --- ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out +++ ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out @@ -67,17 +67,23 @@ STAGE PLANS: dynamic_part_table TableScan alias: dynamic_part_table + Statistics: + numRows: 0 dataSize: 2 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: expr: intcol type: int outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 2 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 2 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -112,9 +118,9 @@ STAGE PLANS: #### A masked pattern was here #### name default.dynamic_part_table numFiles 1 - numRows 0 + numRows -1 partition_columns partcol1/partcol2 - rawDataSize 0 + rawDataSize -1 serialization.ddl struct dynamic_part_table { i32 intcol} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -168,17 +174,23 @@ STAGE PLANS: dynamic_part_table TableScan alias: dynamic_part_table + Statistics: + numRows: 0 dataSize: 2 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: expr: intcol type: int outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 2 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 2 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -213,9 +225,9 @@ STAGE PLANS: #### A masked pattern was here #### name default.dynamic_part_table numFiles 1 - numRows 0 + numRows -1 partition_columns partcol1/partcol2 - rawDataSize 0 + rawDataSize -1 serialization.ddl struct dynamic_part_table { i32 intcol} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -269,17 +281,23 @@ STAGE PLANS: dynamic_part_table TableScan alias: dynamic_part_table + Statistics: + numRows: 0 dataSize: 4 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: expr: intcol type: int outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 4 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 4 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -314,9 +332,9 @@ STAGE PLANS: #### A masked pattern was here #### name default.dynamic_part_table numFiles 1 - numRows 0 + numRows -1 partition_columns partcol1/partcol2 - rawDataSize 0 + rawDataSize -1 serialization.ddl struct dynamic_part_table { i32 intcol} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git ql/src/test/results/clientpositive/filter_join_breaktask.q.out ql/src/test/results/clientpositive/filter_join_breaktask.q.out index 656d941..1c9d9a2 100644 --- ql/src/test/results/clientpositive/filter_join_breaktask.q.out +++ ql/src/test/results/clientpositive/filter_join_breaktask.q.out @@ -42,12 +42,16 @@ STAGE PLANS: f TableScan alias: f + Statistics: + numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: key is not null type: boolean + Statistics: + numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -56,6 +60,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: key @@ -63,12 +69,16 @@ STAGE PLANS: m TableScan alias: m + Statistics: + numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key is not null and value is not null) and (value <> '')) type: boolean + Statistics: + numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -77,6 +87,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: value @@ -176,12 +188,16 @@ STAGE PLANS: g TableScan alias: g + Statistics: + numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (value <> '') type: boolean + Statistics: + numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: value @@ -190,6 +206,8 @@ STAGE PLANS: Map-reduce partition columns: expr: value type: string + Statistics: + numRows: 25 dataSize: 211 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: value diff --git ql/src/test/results/clientpositive/groupby_map_ppr.q.out ql/src/test/results/clientpositive/groupby_map_ppr.q.out index a8fdad1..2bc660d 100644 --- ql/src/test/results/clientpositive/groupby_map_ppr.q.out +++ ql/src/test/results/clientpositive/groupby_map_ppr.q.out @@ -32,6 +32,8 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -40,6 +42,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: key, value + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Group By Operator aggregations: expr: count(DISTINCT substr(value, 5)) @@ -52,6 +56,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -62,6 +68,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col2 @@ -170,6 +178,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -179,11 +189,15 @@ STAGE PLANS: expr: concat(_col0, _col2) type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out index 233c4c3..147c9a5 100644 --- ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out @@ -32,6 +32,8 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -40,6 +42,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: key, value + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Group By Operator aggregations: expr: count(DISTINCT substr(value, 5)) @@ -56,6 +60,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -68,6 +74,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col3 @@ -182,6 +190,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -195,11 +205,15 @@ STAGE PLANS: expr: UDFToInteger(_col4) type: int outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/groupby_ppr.q.out ql/src/test/results/clientpositive/groupby_ppr.q.out index d574b79..3dd382f 100644 --- ql/src/test/results/clientpositive/groupby_ppr.q.out +++ ql/src/test/results/clientpositive/groupby_ppr.q.out @@ -32,6 +32,8 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -40,6 +42,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: key, value + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: substr(key, 1, 1) @@ -50,6 +54,8 @@ STAGE PLANS: Map-reduce partition columns: expr: substr(key, 1, 1) type: string + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: -1 Path -> Alias: #### A masked pattern was here #### @@ -153,6 +159,8 @@ STAGE PLANS: type: string mode: complete outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -162,11 +170,15 @@ STAGE PLANS: expr: concat(_col0, _col2) type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out index 54612e5..73e96de 100644 --- ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out @@ -32,6 +32,8 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -40,6 +42,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: key, value + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: substr(key, 1, 1) @@ -52,6 +56,8 @@ STAGE PLANS: Map-reduce partition columns: expr: substr(key, 1, 1) type: string + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: -1 Path -> Alias: #### A masked pattern was here #### @@ -157,6 +163,8 @@ STAGE PLANS: type: string mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -170,11 +178,15 @@ STAGE PLANS: expr: UDFToInteger(_col4) type: int outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/groupby_sort_1.q.out ql/src/test/results/clientpositive/groupby_sort_1.q.out index 137c5c9..6dc3072 100644 --- ql/src/test/results/clientpositive/groupby_sort_1.q.out +++ ql/src/test/results/clientpositive/groupby_sort_1.q.out @@ -66,12 +66,16 @@ STAGE PLANS: t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -81,6 +85,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -88,11 +94,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -409,6 +419,8 @@ STAGE PLANS: t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -417,6 +429,8 @@ STAGE PLANS: expr: val type: string outputColumnNames: key, val + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -428,6 +442,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -440,6 +456,8 @@ STAGE PLANS: type: string expr: _col1 type: string + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col2 @@ -508,6 +526,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -517,11 +537,15 @@ STAGE PLANS: expr: UDFToInteger(_col2) type: int outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -644,12 +668,16 @@ STAGE PLANS: subq1:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: _col0 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -659,6 +687,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -666,11 +696,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1038,12 +1072,16 @@ STAGE PLANS: subq1:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: _col0 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -1053,6 +1091,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -1060,11 +1100,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1456,12 +1500,16 @@ STAGE PLANS: t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -1473,6 +1521,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -1482,11 +1532,15 @@ STAGE PLANS: expr: UDFToInteger(_col2) type: int outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1843,6 +1897,8 @@ STAGE PLANS: t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -1851,6 +1907,8 @@ STAGE PLANS: expr: val type: string outputColumnNames: key, val + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -1864,6 +1922,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -1880,6 +1940,8 @@ STAGE PLANS: type: int expr: _col2 type: string + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col3 @@ -1950,6 +2012,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -1961,11 +2025,15 @@ STAGE PLANS: expr: UDFToInteger(_col3) type: int outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2116,12 +2184,16 @@ STAGE PLANS: t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -2133,6 +2205,8 @@ STAGE PLANS: type: double mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -2145,6 +2219,8 @@ STAGE PLANS: type: string expr: _col1 type: double + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col2 @@ -2213,6 +2289,8 @@ STAGE PLANS: type: double mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -2222,11 +2300,15 @@ STAGE PLANS: expr: UDFToInteger(_col2) type: int outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2405,12 +2487,16 @@ STAGE PLANS: subq1:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -2420,6 +2506,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -2427,6 +2515,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: sum(_col1) @@ -2436,6 +2526,8 @@ STAGE PLANS: type: double mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -2444,6 +2536,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: double + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -2510,6 +2604,8 @@ STAGE PLANS: type: double mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -2517,11 +2613,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2713,12 +2813,16 @@ STAGE PLANS: null-subquery1:subq1-subquery1:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -2728,6 +2832,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -2735,7 +2841,11 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Union + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -2743,11 +2853,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2775,12 +2889,16 @@ STAGE PLANS: null-subquery2:subq1-subquery2:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -2790,6 +2908,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -2797,7 +2917,11 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Union + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -2805,11 +2929,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3247,12 +3375,16 @@ STAGE PLANS: null-subquery2:subq1-subquery2:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -3262,6 +3394,8 @@ STAGE PLANS: type: double mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -3270,6 +3404,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: double + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -3336,6 +3472,8 @@ STAGE PLANS: type: double mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -3343,6 +3481,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -3368,6 +3508,8 @@ STAGE PLANS: TableScan GatherStats: false Union + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -3375,11 +3517,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3407,12 +3553,16 @@ STAGE PLANS: null-subquery1:subq1-subquery1:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -3422,6 +3572,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToDouble(_col0) @@ -3429,7 +3581,11 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Union + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -3437,11 +3593,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3900,12 +4060,16 @@ STAGE PLANS: subq1:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -3915,6 +4079,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -3922,6 +4088,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -3930,6 +4098,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: _col0 @@ -3939,12 +4109,16 @@ STAGE PLANS: subq2:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -3954,6 +4128,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -3961,6 +4137,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -3969,6 +4147,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: _col1 @@ -4252,6 +4432,8 @@ STAGE PLANS: subq2:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -4260,6 +4442,8 @@ STAGE PLANS: expr: val type: string outputColumnNames: key, val + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -4271,6 +4455,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -4283,6 +4469,8 @@ STAGE PLANS: type: string expr: _col1 type: string + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col2 @@ -4351,6 +4539,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -4360,6 +4550,8 @@ STAGE PLANS: expr: _col2 type: bigint outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -4392,6 +4584,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: _col0 @@ -4403,12 +4597,16 @@ STAGE PLANS: subq1:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -4418,6 +4616,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -4425,6 +4625,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -4433,6 +4635,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: _col0 @@ -4693,12 +4897,16 @@ STAGE PLANS: t2 TableScan alias: t2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -4708,6 +4916,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -4716,6 +4926,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -4782,6 +4994,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -4789,11 +5003,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -5005,6 +5223,8 @@ STAGE PLANS: t2 TableScan alias: t2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -5013,6 +5233,8 @@ STAGE PLANS: expr: val type: string outputColumnNames: key, val + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -5026,6 +5248,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -5037,11 +5261,15 @@ STAGE PLANS: expr: UDFToInteger(_col3) type: int outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -5538,6 +5766,8 @@ STAGE PLANS: t2 TableScan alias: t2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -5546,6 +5776,8 @@ STAGE PLANS: expr: val type: string outputColumnNames: key, val + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -5561,6 +5793,8 @@ STAGE PLANS: type: int mode: final outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -5574,11 +5808,15 @@ STAGE PLANS: expr: UDFToInteger(_col4) type: int outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -6012,6 +6250,8 @@ STAGE PLANS: subq:t2 TableScan alias: t2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -6022,6 +6262,8 @@ STAGE PLANS: expr: val type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -6035,6 +6277,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -6046,11 +6290,15 @@ STAGE PLANS: expr: UDFToInteger(_col3) type: int outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -6544,6 +6792,8 @@ STAGE PLANS: subq2:subq:t2 TableScan alias: t2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -6554,6 +6804,8 @@ STAGE PLANS: expr: val type: string outputColumnNames: _col0, _col3, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -6567,6 +6819,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -6578,11 +6832,15 @@ STAGE PLANS: expr: UDFToInteger(_col3) type: int outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/groupby_sort_6.q.out ql/src/test/results/clientpositive/groupby_sort_6.q.out index 222261f..4c0465c 100644 --- ql/src/test/results/clientpositive/groupby_sort_6.q.out +++ ql/src/test/results/clientpositive/groupby_sort_6.q.out @@ -33,17 +33,23 @@ STAGE PLANS: t1 TableScan alias: t1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (ds = '1') type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -53,6 +59,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -61,6 +69,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -76,6 +86,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -83,11 +95,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -193,17 +209,23 @@ STAGE PLANS: t1 TableScan alias: t1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (ds = '1') type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -213,6 +235,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -221,6 +245,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -236,6 +262,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -243,11 +271,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -362,12 +394,16 @@ STAGE PLANS: t1 TableScan alias: t1 + Statistics: + numRows: 0 dataSize: 30 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 0 dataSize: 30 basicStatsState: PARTIAL colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -377,6 +413,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 30 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -385,6 +423,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 30 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -446,6 +486,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 30 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -453,11 +495,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 30 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 30 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out index 341bb55..d1f868f 100644 --- ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out +++ ql/src/test/results/clientpositive/groupby_sort_skew_1.q.out @@ -66,12 +66,16 @@ STAGE PLANS: t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -81,6 +85,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -88,11 +94,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -410,6 +420,8 @@ STAGE PLANS: t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -418,6 +430,8 @@ STAGE PLANS: expr: val type: string outputColumnNames: key, val + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -429,6 +443,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -439,6 +455,8 @@ STAGE PLANS: Map-reduce partition columns: expr: rand() type: double + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col2 @@ -507,6 +525,8 @@ STAGE PLANS: type: string mode: partials outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -543,6 +563,8 @@ STAGE PLANS: type: string expr: _col1 type: string + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col2 @@ -585,6 +607,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -594,11 +618,15 @@ STAGE PLANS: expr: UDFToInteger(_col2) type: int outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -721,12 +749,16 @@ STAGE PLANS: subq1:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: _col0 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -736,6 +768,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -743,11 +777,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1115,12 +1153,16 @@ STAGE PLANS: subq1:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: _col0 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -1130,6 +1172,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -1137,11 +1181,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1533,12 +1581,16 @@ STAGE PLANS: t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -1550,6 +1602,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -1559,11 +1613,15 @@ STAGE PLANS: expr: UDFToInteger(_col2) type: int outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1921,6 +1979,8 @@ STAGE PLANS: t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -1929,6 +1989,8 @@ STAGE PLANS: expr: val type: string outputColumnNames: key, val + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -1942,6 +2004,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -1954,6 +2018,8 @@ STAGE PLANS: Map-reduce partition columns: expr: rand() type: double + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col3 @@ -2024,6 +2090,8 @@ STAGE PLANS: type: string mode: partials outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -2064,6 +2132,8 @@ STAGE PLANS: type: int expr: _col2 type: string + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col3 @@ -2108,6 +2178,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -2119,11 +2191,15 @@ STAGE PLANS: expr: UDFToInteger(_col3) type: int outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2275,12 +2351,16 @@ STAGE PLANS: t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -2292,6 +2372,8 @@ STAGE PLANS: type: double mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -2302,6 +2384,8 @@ STAGE PLANS: Map-reduce partition columns: expr: rand() type: double + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col2 @@ -2370,6 +2454,8 @@ STAGE PLANS: type: double mode: partials outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -2406,6 +2492,8 @@ STAGE PLANS: type: string expr: _col1 type: double + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col2 @@ -2448,6 +2536,8 @@ STAGE PLANS: type: double mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -2457,11 +2547,15 @@ STAGE PLANS: expr: UDFToInteger(_col2) type: int outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2641,12 +2735,16 @@ STAGE PLANS: subq1:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -2656,6 +2754,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -2663,6 +2763,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: sum(_col1) @@ -2672,6 +2774,8 @@ STAGE PLANS: type: double mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -2680,6 +2784,8 @@ STAGE PLANS: Map-reduce partition columns: expr: rand() type: double + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -2746,6 +2852,8 @@ STAGE PLANS: type: double mode: partials outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -2778,6 +2886,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: double + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -2818,6 +2928,8 @@ STAGE PLANS: type: double mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -2825,11 +2937,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3021,12 +3137,16 @@ STAGE PLANS: null-subquery1:subq1-subquery1:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -3036,6 +3156,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -3043,7 +3165,11 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Union + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -3051,11 +3177,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3083,12 +3213,16 @@ STAGE PLANS: null-subquery2:subq1-subquery2:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -3098,6 +3232,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -3105,7 +3241,11 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Union + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -3113,11 +3253,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3556,12 +3700,16 @@ STAGE PLANS: null-subquery2:subq1-subquery2:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -3571,6 +3719,8 @@ STAGE PLANS: type: double mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -3579,6 +3729,8 @@ STAGE PLANS: Map-reduce partition columns: expr: rand() type: double + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -3645,6 +3797,8 @@ STAGE PLANS: type: double mode: partials outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -3677,6 +3831,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: double + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -3717,6 +3873,8 @@ STAGE PLANS: type: double mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -3724,6 +3882,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -3749,6 +3909,8 @@ STAGE PLANS: TableScan GatherStats: false Union + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -3756,11 +3918,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3788,12 +3954,16 @@ STAGE PLANS: null-subquery1:subq1-subquery1:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -3803,6 +3973,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToDouble(_col0) @@ -3810,7 +3982,11 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Union + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -3818,11 +3994,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 12 dataSize: 48 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4281,12 +4461,16 @@ STAGE PLANS: subq1:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -4296,6 +4480,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -4303,6 +4489,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -4311,6 +4499,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: _col0 @@ -4320,12 +4510,16 @@ STAGE PLANS: subq2:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -4335,6 +4529,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -4342,6 +4538,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -4350,6 +4548,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: _col1 @@ -4634,6 +4834,8 @@ STAGE PLANS: subq2:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -4642,6 +4844,8 @@ STAGE PLANS: expr: val type: string outputColumnNames: key, val + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -4653,6 +4857,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -4663,6 +4869,8 @@ STAGE PLANS: Map-reduce partition columns: expr: rand() type: double + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col2 @@ -4731,6 +4939,8 @@ STAGE PLANS: type: string mode: partials outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -4767,6 +4977,8 @@ STAGE PLANS: type: string expr: _col1 type: string + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col2 @@ -4809,6 +5021,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -4818,6 +5032,8 @@ STAGE PLANS: expr: _col2 type: bigint outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -4850,6 +5066,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: _col0 @@ -4861,12 +5079,16 @@ STAGE PLANS: subq1:t1 TableScan alias: t1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -4876,6 +5098,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -4883,6 +5107,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -4891,6 +5117,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: _col0 @@ -5152,12 +5380,16 @@ STAGE PLANS: t2 TableScan alias: t2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -5167,6 +5399,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -5175,6 +5409,8 @@ STAGE PLANS: Map-reduce partition columns: expr: rand() type: double + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -5241,6 +5477,8 @@ STAGE PLANS: type: string mode: partials outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -5273,6 +5511,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -5313,6 +5553,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -5320,11 +5562,15 @@ STAGE PLANS: expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -5536,6 +5782,8 @@ STAGE PLANS: t2 TableScan alias: t2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -5544,6 +5792,8 @@ STAGE PLANS: expr: val type: string outputColumnNames: key, val + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -5557,6 +5807,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -5568,11 +5820,15 @@ STAGE PLANS: expr: UDFToInteger(_col3) type: int outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -6069,6 +6325,8 @@ STAGE PLANS: t2 TableScan alias: t2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -6077,6 +6335,8 @@ STAGE PLANS: expr: val type: string outputColumnNames: key, val + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -6092,6 +6352,8 @@ STAGE PLANS: type: int mode: final outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -6105,11 +6367,15 @@ STAGE PLANS: expr: UDFToInteger(_col4) type: int outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -6543,6 +6809,8 @@ STAGE PLANS: subq:t2 TableScan alias: t2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -6553,6 +6821,8 @@ STAGE PLANS: expr: val type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -6566,6 +6836,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -6577,11 +6849,15 @@ STAGE PLANS: expr: UDFToInteger(_col3) type: int outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -7075,6 +7351,8 @@ STAGE PLANS: subq2:subq:t2 TableScan alias: t2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -7085,6 +7363,8 @@ STAGE PLANS: expr: val type: string outputColumnNames: _col0, _col3, _col2 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -7098,6 +7378,8 @@ STAGE PLANS: type: string mode: final outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: UDFToInteger(_col0) @@ -7109,11 +7391,15 @@ STAGE PLANS: expr: UDFToInteger(_col3) type: int outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 24 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/input23.q.out ql/src/test/results/clientpositive/input23.q.out index e3bfa3f..e5536af 100644 --- ql/src/test/results/clientpositive/input23.q.out +++ ql/src/test/results/clientpositive/input23.q.out @@ -18,9 +18,13 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator sort order: + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -34,14 +38,20 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((ds = '2008-04-08') and (hr = '14')) type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Reduce Output Operator sort order: + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE tag: 1 value expressions: expr: key diff --git ql/src/test/results/clientpositive/input42.q.out ql/src/test/results/clientpositive/input42.q.out index 3e2fbb2..ed5f9ae 100644 --- ql/src/test/results/clientpositive/input42.q.out +++ ql/src/test/results/clientpositive/input42.q.out @@ -18,6 +18,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -30,6 +32,8 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -37,6 +41,8 @@ STAGE PLANS: expr: _col3 type: string sort order: ++ + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -140,11 +146,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1198,12 +1208,16 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 200) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -1215,6 +1229,8 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -1222,6 +1238,8 @@ STAGE PLANS: expr: _col3 type: string sort order: ++ + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -1325,11 +1343,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1761,12 +1783,16 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (rand(100) < 0.1) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -1778,6 +1804,8 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -1785,6 +1813,8 @@ STAGE PLANS: expr: _col3 type: string sort order: ++ + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -1888,11 +1918,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/input_part1.q.out ql/src/test/results/clientpositive/input_part1.q.out index c6300b8..2d2695c 100644 --- ql/src/test/results/clientpositive/input_part1.q.out +++ ql/src/test/results/clientpositive/input_part1.q.out @@ -31,12 +31,16 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 100) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: UDFToInteger(key) @@ -48,11 +52,15 @@ STAGE PLANS: expr: ds type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/input_part2.q.out ql/src/test/results/clientpositive/input_part2.q.out index 950dd33..fb260d0 100644 --- ql/src/test/results/clientpositive/input_part2.q.out +++ ql/src/test/results/clientpositive/input_part2.q.out @@ -45,12 +45,16 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key < 100) and (ds = '2008-04-08')) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: UDFToInteger(key) @@ -62,11 +66,15 @@ STAGE PLANS: expr: ds type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -91,6 +99,8 @@ STAGE PLANS: predicate: expr: ((key < 100) and (ds = '2008-04-09')) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: UDFToInteger(key) @@ -102,11 +112,15 @@ STAGE PLANS: expr: ds type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/input_part7.q.out ql/src/test/results/clientpositive/input_part7.q.out index 91a9403..7bef3a0 100644 --- ql/src/test/results/clientpositive/input_part7.q.out +++ ql/src/test/results/clientpositive/input_part7.q.out @@ -28,12 +28,16 @@ STAGE PLANS: null-subquery1:a-subquery1:x TableScan alias: x + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 100) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -45,7 +49,11 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Union + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -57,6 +65,8 @@ STAGE PLANS: expr: _col3 type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -68,6 +78,8 @@ STAGE PLANS: expr: _col3 type: string sort order: ++++ + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -81,12 +93,16 @@ STAGE PLANS: null-subquery2:a-subquery2:y TableScan alias: y + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 100) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -98,7 +114,11 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Union + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -110,6 +130,8 @@ STAGE PLANS: expr: _col3 type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -121,6 +143,8 @@ STAGE PLANS: expr: _col3 type: string sort order: ++++ + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -224,11 +248,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/input_part9.q.out ql/src/test/results/clientpositive/input_part9.q.out index a44646d..c8fa6e2 100644 --- ql/src/test/results/clientpositive/input_part9.q.out +++ ql/src/test/results/clientpositive/input_part9.q.out @@ -18,12 +18,16 @@ STAGE PLANS: x TableScan alias: x + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: key is not null type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -35,6 +39,8 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -42,6 +48,8 @@ STAGE PLANS: expr: _col3 type: string sort order: ++ + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -145,11 +153,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/join17.q.out ql/src/test/results/clientpositive/join17.q.out index 958bf2b..f516b40 100644 --- ql/src/test/results/clientpositive/join17.q.out +++ ql/src/test/results/clientpositive/join17.q.out @@ -26,6 +26,8 @@ STAGE PLANS: src1 TableScan alias: src1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -35,6 +37,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -44,6 +48,8 @@ STAGE PLANS: src2 TableScan alias: src2 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -53,6 +59,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: key diff --git ql/src/test/results/clientpositive/join26.q.out ql/src/test/results/clientpositive/join26.q.out index eb8afed..598d856 100644 --- ql/src/test/results/clientpositive/join26.q.out +++ ql/src/test/results/clientpositive/join26.q.out @@ -43,6 +43,8 @@ STAGE PLANS: x TableScan alias: x + Statistics: + numRows: 0 dataSize: 216 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -58,6 +60,8 @@ STAGE PLANS: y TableScan alias: y + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -77,6 +81,8 @@ STAGE PLANS: z TableScan alias: z + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/join32.q.out ql/src/test/results/clientpositive/join32.q.out index 2228e92..17b9436 100644 --- ql/src/test/results/clientpositive/join32.q.out +++ ql/src/test/results/clientpositive/join32.q.out @@ -84,6 +84,8 @@ STAGE PLANS: x TableScan alias: x + Statistics: + numRows: 0 dataSize: 216 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -97,6 +99,8 @@ STAGE PLANS: z TableScan alias: z + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -114,6 +118,8 @@ STAGE PLANS: y TableScan alias: y + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/join32_lessSize.q.out ql/src/test/results/clientpositive/join32_lessSize.q.out index 06aa54a..14ef4cd 100644 --- ql/src/test/results/clientpositive/join32_lessSize.q.out +++ ql/src/test/results/clientpositive/join32_lessSize.q.out @@ -46,6 +46,8 @@ STAGE PLANS: x TableScan alias: x + Statistics: + numRows: 0 dataSize: 216 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -63,6 +65,8 @@ STAGE PLANS: y TableScan alias: y + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -237,6 +241,8 @@ STAGE PLANS: z TableScan alias: z + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -560,6 +566,8 @@ STAGE PLANS: x TableScan alias: x + Statistics: + numRows: 0 dataSize: 216 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -577,6 +585,8 @@ STAGE PLANS: w TableScan alias: w + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -1390,6 +1400,8 @@ STAGE PLANS: y TableScan alias: y + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -1399,6 +1411,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: value @@ -1406,6 +1420,8 @@ STAGE PLANS: z TableScan alias: z + Statistics: + numRows: 0 dataSize: 216 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -1415,6 +1431,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 216 basicStatsState: PARTIAL colStatsState: NONE tag: 2 value expressions: expr: value @@ -1745,6 +1763,8 @@ STAGE PLANS: res:x TableScan alias: x + Statistics: + numRows: 0 dataSize: 216 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -1762,6 +1782,8 @@ STAGE PLANS: res:y TableScan alias: y + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -1943,6 +1965,8 @@ STAGE PLANS: z TableScan alias: z + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -2276,6 +2300,8 @@ STAGE PLANS: res:y TableScan alias: y + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -2293,6 +2319,8 @@ STAGE PLANS: res:x TableScan alias: x + Statistics: + numRows: 0 dataSize: 216 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -2474,6 +2502,8 @@ STAGE PLANS: z TableScan alias: z + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: diff --git ql/src/test/results/clientpositive/join33.q.out ql/src/test/results/clientpositive/join33.q.out index 2228e92..17b9436 100644 --- ql/src/test/results/clientpositive/join33.q.out +++ ql/src/test/results/clientpositive/join33.q.out @@ -84,6 +84,8 @@ STAGE PLANS: x TableScan alias: x + Statistics: + numRows: 0 dataSize: 216 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -97,6 +99,8 @@ STAGE PLANS: z TableScan alias: z + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -114,6 +118,8 @@ STAGE PLANS: y TableScan alias: y + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/join34.q.out ql/src/test/results/clientpositive/join34.q.out index e2140c0..c912485 100644 --- ql/src/test/results/clientpositive/join34.q.out +++ ql/src/test/results/clientpositive/join34.q.out @@ -47,6 +47,8 @@ STAGE PLANS: x TableScan alias: x + Statistics: + numRows: 0 dataSize: 216 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -64,12 +66,16 @@ STAGE PLANS: null-subquery1:subq1-subquery1:x TableScan alias: x + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 20) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -77,7 +83,11 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Union + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -126,12 +136,16 @@ STAGE PLANS: null-subquery2:subq1-subquery2:x1 TableScan alias: x1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key > 100) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -139,7 +153,11 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Union + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Map Join Operator condition map: Inner Join 0 to 1 diff --git ql/src/test/results/clientpositive/join35.q.out ql/src/test/results/clientpositive/join35.q.out index db1d470..7e23bfd 100644 --- ql/src/test/results/clientpositive/join35.q.out +++ ql/src/test/results/clientpositive/join35.q.out @@ -49,17 +49,23 @@ STAGE PLANS: null-subquery1:subq1-subquery1:x TableScan alias: x + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 20) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -69,6 +75,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -77,6 +85,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -139,6 +149,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -146,6 +158,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -591,6 +605,8 @@ STAGE PLANS: TableScan GatherStats: false Union + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -599,6 +615,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: _col1 @@ -607,6 +625,8 @@ STAGE PLANS: TableScan GatherStats: false Union + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -615,6 +635,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: _col1 @@ -622,6 +644,8 @@ STAGE PLANS: x TableScan alias: x + Statistics: + numRows: 0 dataSize: 216 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -631,6 +655,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 216 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: key @@ -775,17 +801,23 @@ STAGE PLANS: null-subquery2:subq1-subquery2:x1 TableScan alias: x1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key > 100) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -795,6 +827,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -803,6 +837,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -865,6 +901,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -872,6 +910,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 diff --git ql/src/test/results/clientpositive/join9.q.out ql/src/test/results/clientpositive/join9.q.out index 1e462f1..3817cee 100644 --- ql/src/test/results/clientpositive/join9.q.out +++ ql/src/test/results/clientpositive/join9.q.out @@ -26,6 +26,8 @@ STAGE PLANS: src1 TableScan alias: src1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -35,6 +37,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -42,6 +46,8 @@ STAGE PLANS: src2 TableScan alias: src2 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -51,6 +57,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: value diff --git ql/src/test/results/clientpositive/join_filters_overlap.q.out ql/src/test/results/clientpositive/join_filters_overlap.q.out index c8179c3..e01bcb9 100644 --- ql/src/test/results/clientpositive/join_filters_overlap.q.out +++ ql/src/test/results/clientpositive/join_filters_overlap.q.out @@ -29,6 +29,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -38,6 +40,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: key @@ -47,12 +51,16 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (value = 50) type: boolean + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -61,6 +69,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: key @@ -70,12 +80,16 @@ STAGE PLANS: c TableScan alias: c + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (value = 60) type: boolean + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -84,6 +98,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 2 value expressions: expr: key @@ -195,7 +211,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select * from a left outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (a.key=c.key AND a.value=60 AND c.value=60) PREHOOK: type: QUERY PREHOOK: Input: default@a @@ -238,12 +253,16 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (value = 50) type: boolean + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -252,6 +271,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: key @@ -261,6 +282,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -270,6 +293,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: key @@ -279,12 +304,16 @@ STAGE PLANS: c TableScan alias: c + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (value = 60) type: boolean + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -293,6 +322,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 2 value expressions: expr: key @@ -404,7 +435,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50) left outer join a c on (b.key=c.key AND b.value=60 AND c.value=60) PREHOOK: type: QUERY PREHOOK: Input: default@a @@ -447,12 +477,16 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (value = 50) type: boolean + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -461,6 +495,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: key @@ -470,6 +506,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -479,6 +517,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: key @@ -488,12 +528,16 @@ STAGE PLANS: c TableScan alias: c + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (value = 60) type: boolean + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -502,6 +546,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 2 value expressions: expr: key @@ -613,7 +659,6 @@ STAGE PLANS: Fetch Operator limit: -1 - PREHOOK: query: select * from a right outer join a b on (a.key=b.key AND a.value=50 AND b.value=50 AND b.value>10) left outer join a c on (b.key=c.key AND b.value=60 AND b.value>20 AND c.value=60) PREHOOK: type: QUERY PREHOOK: Input: default@a @@ -656,6 +701,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -665,6 +712,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: key @@ -674,6 +723,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -683,6 +734,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: key @@ -692,12 +745,16 @@ STAGE PLANS: c TableScan alias: c + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (value = 60) type: boolean + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -706,6 +763,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 2 value expressions: expr: key @@ -715,12 +774,16 @@ STAGE PLANS: d TableScan alias: d + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (value = 40) type: boolean + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -729,6 +792,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 3 value expressions: expr: key @@ -882,6 +947,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -891,6 +958,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: key @@ -900,12 +969,16 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (value = 50) type: boolean + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -914,6 +987,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: key @@ -923,12 +998,16 @@ STAGE PLANS: c TableScan alias: c + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (value = 60) type: boolean + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -937,6 +1016,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 2 value expressions: expr: key @@ -946,12 +1027,16 @@ STAGE PLANS: d TableScan alias: d + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (value = 40) type: boolean + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -960,6 +1045,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 3 dataSize: 18 basicStatsState: COMPLETE colStatsState: NONE tag: 3 value expressions: expr: key diff --git ql/src/test/results/clientpositive/join_map_ppr.q.out ql/src/test/results/clientpositive/join_map_ppr.q.out index 62daebd..ed15a54 100644 --- ql/src/test/results/clientpositive/join_map_ppr.q.out +++ ql/src/test/results/clientpositive/join_map_ppr.q.out @@ -45,6 +45,8 @@ STAGE PLANS: x TableScan alias: x + Statistics: + numRows: 0 dataSize: 216 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -60,6 +62,8 @@ STAGE PLANS: y TableScan alias: y + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -79,6 +83,8 @@ STAGE PLANS: z TableScan alias: z + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -590,6 +596,8 @@ STAGE PLANS: x TableScan alias: x + Statistics: + numRows: 25 dataSize: 191 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -605,6 +613,8 @@ STAGE PLANS: y TableScan alias: y + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -624,6 +634,8 @@ STAGE PLANS: z TableScan alias: z + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/load_dyn_part8.q.out ql/src/test/results/clientpositive/load_dyn_part8.q.out index 41ac148..77b8b47 100644 --- ql/src/test/results/clientpositive/load_dyn_part8.q.out +++ ql/src/test/results/clientpositive/load_dyn_part8.q.out @@ -54,12 +54,16 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (ds <= '2008-04-08') type: boolean + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -71,11 +75,15 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -101,6 +109,8 @@ STAGE PLANS: predicate: expr: (ds > '2008-04-08') type: boolean + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -110,12 +120,16 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-12-31/ + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/louter_join_ppr.q.out ql/src/test/results/clientpositive/louter_join_ppr.q.out index 576c96e..918a7fe 100644 --- ql/src/test/results/clientpositive/louter_join_ppr.q.out +++ ql/src/test/results/clientpositive/louter_join_ppr.q.out @@ -30,12 +30,16 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key > 10) and (key < 20)) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -44,6 +48,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -53,12 +59,16 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key > 10) and (key < 20)) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -67,6 +77,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: key @@ -328,12 +340,16 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key > 10) and (key < 20)) type: boolean + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -342,6 +358,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -353,12 +371,16 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key > 10) and (key < 20)) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -367,6 +389,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: key @@ -723,12 +747,16 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key > 10) and (key < 20)) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -737,6 +765,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -746,12 +776,16 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key > 10) and (key < 20)) type: boolean + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -760,6 +794,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: key @@ -1113,12 +1149,16 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key > 10) and (key < 20)) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -1127,6 +1167,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -1136,12 +1178,16 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key > 10) and (key < 20)) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -1150,6 +1196,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: key diff --git ql/src/test/results/clientpositive/macro.q.out ql/src/test/results/clientpositive/macro.q.out index 46cd501..bb6e53b 100644 --- ql/src/test/results/clientpositive/macro.q.out +++ ql/src/test/results/clientpositive/macro.q.out @@ -54,13 +54,19 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE GatherStats: false Select Operator expressions: expr: SIGMOID(2) type: double outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Limit + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE ListSink @@ -124,13 +130,19 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE GatherStats: false Select Operator expressions: expr: (FIXED_NUMBER() + 1) type: int outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Limit + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE ListSink @@ -215,13 +227,19 @@ STAGE PLANS: Processor Tree: TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE GatherStats: false Select Operator expressions: expr: SIMPLE_ADD(1, 9) type: int outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Limit + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE ListSink diff --git ql/src/test/results/clientpositive/merge3.q.out ql/src/test/results/clientpositive/merge3.q.out index 3a43e7f..5c4ab1b 100644 --- ql/src/test/results/clientpositive/merge3.q.out +++ ql/src/test/results/clientpositive/merge3.q.out @@ -74,6 +74,8 @@ STAGE PLANS: merge_src TableScan alias: merge_src + Statistics: + numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -82,11 +84,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2403,6 +2409,8 @@ STAGE PLANS: merge_src_part TableScan alias: merge_src_part + Statistics: + numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -2413,11 +2421,15 @@ STAGE PLANS: expr: ds type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4838,6 +4850,8 @@ STAGE PLANS: s:merge_src_part TableScan alias: merge_src_part + Statistics: + numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -4848,11 +4862,15 @@ STAGE PLANS: expr: ds type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator sort order: Map-reduce partition columns: expr: _col2 type: string + Statistics: + numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -4952,6 +4970,8 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -4961,11 +4981,15 @@ STAGE PLANS: expr: _col2 type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 2000 dataSize: 21248 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/metadataonly1.q.out ql/src/test/results/clientpositive/metadataonly1.q.out index d637353..5661649 100644 --- ql/src/test/results/clientpositive/metadataonly1.q.out +++ ql/src/test/results/clientpositive/metadataonly1.q.out @@ -21,20 +21,28 @@ STAGE PLANS: test1 TableScan alias: test1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE GatherStats: false Select Operator expressions: expr: ds type: string outputColumnNames: ds + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Group By Operator aggregations: expr: max(ds) bucketGroup: false mode: hash outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Reduce Output Operator sort order: + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -47,16 +55,22 @@ STAGE PLANS: bucketGroup: false mode: mergepartial outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Select Operator expressions: expr: _col0 type: string outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -112,20 +126,28 @@ STAGE PLANS: test1 TableScan alias: test1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE GatherStats: false Select Operator expressions: expr: ds type: string outputColumnNames: ds + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Group By Operator aggregations: expr: max(ds) bucketGroup: false mode: hash outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Reduce Output Operator sort order: + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE tag: -1 value expressions: expr: _col0 @@ -179,16 +201,22 @@ STAGE PLANS: bucketGroup: false mode: mergepartial outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Select Operator expressions: expr: _col0 type: string outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -239,12 +267,16 @@ STAGE PLANS: test1 TableScan alias: test1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE GatherStats: false Select Operator expressions: expr: ds type: string outputColumnNames: ds + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Group By Operator aggregations: expr: count(DISTINCT ds) @@ -254,11 +286,15 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Reduce Output Operator key expressions: expr: _col0 type: string sort order: + + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE tag: -1 value expressions: expr: _col1 @@ -312,16 +348,22 @@ STAGE PLANS: bucketGroup: false mode: mergepartial outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Select Operator expressions: expr: _col0 type: bigint outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -372,20 +414,28 @@ STAGE PLANS: test1 TableScan alias: test1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE GatherStats: false Select Operator expressions: expr: ds type: string outputColumnNames: ds + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Group By Operator aggregations: expr: count(ds) bucketGroup: false mode: hash outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Reduce Output Operator sort order: + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE tag: -1 value expressions: expr: _col0 @@ -439,16 +489,22 @@ STAGE PLANS: bucketGroup: false mode: mergepartial outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Select Operator expressions: expr: _col0 type: bigint outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -510,20 +566,28 @@ STAGE PLANS: b:test1 TableScan alias: test1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE GatherStats: false Select Operator expressions: expr: ds type: string outputColumnNames: ds + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Group By Operator aggregations: expr: max(ds) bucketGroup: false mode: hash outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Reduce Output Operator sort order: + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE tag: -1 value expressions: expr: _col0 @@ -615,11 +679,15 @@ STAGE PLANS: bucketGroup: false mode: mergepartial outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Select Operator expressions: expr: _col0 type: string outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 @@ -652,10 +720,14 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE tag: 1 a2 TableScan alias: a2 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE GatherStats: false Reduce Output Operator key expressions: @@ -665,6 +737,8 @@ STAGE PLANS: Map-reduce partition columns: expr: ds type: string + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE tag: 0 Path -> Alias: #### A masked pattern was here #### @@ -932,6 +1006,8 @@ STAGE PLANS: test2 TableScan alias: test2 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE GatherStats: false Select Operator expressions: @@ -940,6 +1016,8 @@ STAGE PLANS: expr: hr type: string outputColumnNames: ds, hr + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Group By Operator aggregations: expr: count(DISTINCT hr) @@ -951,6 +1029,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Reduce Output Operator key expressions: expr: _col0 @@ -961,6 +1041,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE tag: -1 value expressions: expr: _col2 @@ -1096,6 +1178,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Select Operator expressions: expr: _col0 @@ -1103,11 +1187,15 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1162,6 +1250,8 @@ STAGE PLANS: test2 TableScan alias: test2 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE GatherStats: false Select Operator expressions: @@ -1170,6 +1260,8 @@ STAGE PLANS: expr: hr type: string outputColumnNames: ds, hr + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Group By Operator aggregations: expr: count(hr) @@ -1179,6 +1271,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Reduce Output Operator key expressions: expr: _col0 @@ -1187,6 +1281,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE tag: -1 value expressions: expr: _col1 @@ -1320,6 +1416,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Select Operator expressions: expr: _col0 @@ -1327,11 +1425,15 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1385,20 +1487,28 @@ STAGE PLANS: test1 TableScan alias: test1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE GatherStats: false Select Operator expressions: expr: ds type: string outputColumnNames: ds + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Group By Operator aggregations: expr: max(ds) bucketGroup: false mode: hash outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Reduce Output Operator sort order: + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE tag: -1 value expressions: expr: _col0 @@ -1490,16 +1600,22 @@ STAGE PLANS: bucketGroup: false mode: mergepartial outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Select Operator expressions: expr: _col0 type: string outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1603,6 +1719,8 @@ STAGE PLANS: test2 TableScan alias: test2 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE GatherStats: false Select Operator expressions: @@ -1611,6 +1729,8 @@ STAGE PLANS: expr: hr type: string outputColumnNames: ds, hr + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Group By Operator aggregations: expr: count(DISTINCT hr) @@ -1622,6 +1742,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Reduce Output Operator key expressions: expr: _col0 @@ -1632,6 +1754,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE tag: -1 value expressions: expr: _col2 @@ -1845,6 +1969,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE Select Operator expressions: expr: _col0 @@ -1852,11 +1978,15 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/outer_join_ppr.q.out ql/src/test/results/clientpositive/outer_join_ppr.q.out index 96992bd..e5da9cd 100644 --- ql/src/test/results/clientpositive/outer_join_ppr.q.out +++ ql/src/test/results/clientpositive/outer_join_ppr.q.out @@ -30,6 +30,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -39,6 +41,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -48,6 +52,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -57,6 +63,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: key @@ -415,6 +423,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -424,6 +434,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -433,6 +445,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -442,6 +456,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: key diff --git ql/src/test/results/clientpositive/pcr.q.out ql/src/test/results/clientpositive/pcr.q.out index 046e715..4c5d570 100644 --- ql/src/test/results/clientpositive/pcr.q.out +++ ql/src/test/results/clientpositive/pcr.q.out @@ -75,12 +75,16 @@ STAGE PLANS: pcr_t1 TableScan alias: pcr_t1 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 5) type: boolean + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -90,6 +94,8 @@ STAGE PLANS: expr: ds type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -97,6 +103,8 @@ STAGE PLANS: expr: _col2 type: string sort order: ++ + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -196,11 +204,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -274,12 +286,16 @@ STAGE PLANS: pcr_t1 TableScan alias: pcr_t1 + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((ds <= '2000-04-09') or (key < 5)) type: boolean + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -287,11 +303,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 type: int sort order: + + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -431,11 +451,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -546,12 +570,16 @@ STAGE PLANS: pcr_t1 TableScan alias: pcr_t1 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key < 5) and (value <> 'val_2')) type: boolean + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -561,6 +589,8 @@ STAGE PLANS: expr: ds type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -568,6 +598,8 @@ STAGE PLANS: expr: _col2 type: string sort order: ++ + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -667,11 +699,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -747,12 +783,16 @@ STAGE PLANS: pcr_t1 TableScan alias: pcr_t1 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (((ds < '2000-04-09') and (key < 5)) or ((ds > '2000-04-09') and (value = 'val_5'))) type: boolean + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -762,6 +802,8 @@ STAGE PLANS: expr: ds type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -769,6 +811,8 @@ STAGE PLANS: expr: _col2 type: string sort order: ++ + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -868,11 +912,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -950,12 +998,16 @@ STAGE PLANS: pcr_t1 TableScan alias: pcr_t1 + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (((ds < '2000-04-10') and (key < 5)) or ((ds > '2000-04-08') and (value = 'val_5'))) type: boolean + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -965,6 +1017,8 @@ STAGE PLANS: expr: ds type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -972,6 +1026,8 @@ STAGE PLANS: expr: _col2 type: string sort order: ++ + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -1113,11 +1169,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1205,12 +1265,16 @@ STAGE PLANS: pcr_t1 TableScan alias: pcr_t1 + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (((ds < '2000-04-10') or (key < 5)) and ((ds > '2000-04-08') or (value = 'val_5'))) type: boolean + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -1220,6 +1284,8 @@ STAGE PLANS: expr: ds type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -1227,6 +1293,8 @@ STAGE PLANS: expr: _col2 type: string sort order: ++ + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -1368,11 +1436,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1468,12 +1540,16 @@ STAGE PLANS: pcr_t1 TableScan alias: pcr_t1 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key = 14) type: boolean + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -1481,6 +1557,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -1488,6 +1566,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -1585,11 +1665,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1653,6 +1737,8 @@ STAGE PLANS: pcr_t1 TableScan alias: pcr_t1 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -1661,6 +1747,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -1668,6 +1756,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -1765,11 +1855,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1873,6 +1967,8 @@ STAGE PLANS: pcr_t1 TableScan alias: pcr_t1 + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -1881,6 +1977,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -1888,6 +1986,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -2027,11 +2127,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2157,12 +2261,16 @@ STAGE PLANS: pcr_t1 TableScan alias: pcr_t1 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) type: boolean + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -2172,6 +2280,8 @@ STAGE PLANS: expr: ds type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -2181,6 +2291,8 @@ STAGE PLANS: expr: _col2 type: string sort order: +++ + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -2280,11 +2392,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2350,6 +2466,8 @@ STAGE PLANS: t1 TableScan alias: t1 + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -2359,6 +2477,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: key @@ -2370,6 +2490,8 @@ STAGE PLANS: t2 TableScan alias: t2 + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -2379,6 +2501,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: key @@ -2634,6 +2758,8 @@ STAGE PLANS: t1 TableScan alias: t1 + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -2643,6 +2769,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: key @@ -2654,6 +2782,8 @@ STAGE PLANS: t2 TableScan alias: t2 + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -2663,6 +2793,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: int + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: key @@ -2979,12 +3111,16 @@ STAGE PLANS: pcr_t1 TableScan alias: pcr_t1 + Statistics: + numRows: 80 dataSize: 640 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (((ds > '2000-04-08') and (ds < '2000-04-11')) or (key = 2)) type: boolean + Statistics: + numRows: 80 dataSize: 640 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -2994,6 +3130,8 @@ STAGE PLANS: expr: ds type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 80 dataSize: 640 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -3003,6 +3141,8 @@ STAGE PLANS: expr: _col2 type: string sort order: +++ + Statistics: + numRows: 80 dataSize: 640 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -3186,11 +3326,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 80 dataSize: 640 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 80 dataSize: 640 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3304,12 +3448,16 @@ STAGE PLANS: pcr_t1 TableScan alias: pcr_t1 + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((ds > '2000-04-08') or ((ds <= '2000-04-09') and (key = 2))) type: boolean + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -3319,6 +3467,8 @@ STAGE PLANS: expr: ds type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -3328,6 +3478,8 @@ STAGE PLANS: expr: _col2 type: string sort order: +++ + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -3469,11 +3621,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 60 dataSize: 480 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3629,6 +3785,8 @@ STAGE PLANS: pcr_t1 TableScan alias: pcr_t1 + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -3637,11 +3795,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3668,11 +3830,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4167,12 +4333,16 @@ STAGE PLANS: pcr_t1 TableScan alias: pcr_t1 + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key = 2) type: boolean + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -4180,11 +4350,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4214,6 +4388,8 @@ STAGE PLANS: predicate: expr: (key = 3) type: boolean + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -4221,11 +4397,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 20 dataSize: 160 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4784,6 +4964,8 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -4792,11 +4974,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 type: string sort order: + + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -4853,12 +5039,18 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Limit + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -4938,12 +5130,16 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key = 11) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -4955,6 +5151,8 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -4964,6 +5162,8 @@ STAGE PLANS: expr: _col3 type: string sort order: +++ + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -5067,11 +5267,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -5157,12 +5361,16 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key = 11) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -5174,6 +5382,8 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -5183,6 +5393,8 @@ STAGE PLANS: expr: _col3 type: string sort order: +++ + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -5286,11 +5498,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/ppd_join_filter.q.out ql/src/test/results/clientpositive/ppd_join_filter.q.out index 063ee18..4793931 100644 --- ql/src/test/results/clientpositive/ppd_join_filter.q.out +++ ql/src/test/results/clientpositive/ppd_join_filter.q.out @@ -39,12 +39,16 @@ STAGE PLANS: b:src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Group By Operator aggregations: expr: min(key) @@ -54,6 +58,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -62,6 +68,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -124,6 +132,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -135,11 +145,15 @@ STAGE PLANS: expr: (_col1 + 3) type: double outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Filter Operator isSamplingPred: false predicate: expr: (_col2 < 5.0) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -172,6 +186,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: _col3 @@ -181,6 +197,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -190,6 +208,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -382,12 +402,16 @@ STAGE PLANS: b:src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Group By Operator aggregations: expr: min(key) @@ -397,6 +421,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -405,6 +431,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -467,6 +495,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -478,11 +508,15 @@ STAGE PLANS: expr: (_col1 + 3) type: double outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Filter Operator isSamplingPred: false predicate: expr: (_col2 < 5.0) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -515,6 +549,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: _col3 @@ -524,6 +560,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -533,6 +571,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -725,12 +765,16 @@ STAGE PLANS: b:src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Group By Operator aggregations: expr: min(key) @@ -740,6 +784,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -748,6 +794,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -810,6 +858,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -821,11 +871,15 @@ STAGE PLANS: expr: (_col1 + 3) type: double outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Filter Operator isSamplingPred: false predicate: expr: (_col2 < 5.0) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -858,6 +912,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: _col3 @@ -867,6 +923,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -876,6 +934,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -1068,12 +1128,16 @@ STAGE PLANS: b:src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Group By Operator aggregations: expr: min(key) @@ -1083,6 +1147,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -1091,6 +1157,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -1153,6 +1221,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -1164,11 +1234,15 @@ STAGE PLANS: expr: (_col1 + 3) type: double outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Filter Operator isSamplingPred: false predicate: expr: (_col2 < 5.0) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -1201,6 +1275,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: _col3 @@ -1210,6 +1286,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -1219,6 +1297,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key diff --git ql/src/test/results/clientpositive/ppd_union_view.q.out ql/src/test/results/clientpositive/ppd_union_view.q.out index c571105..e457755 100644 --- ql/src/test/results/clientpositive/ppd_union_view.q.out +++ ql/src/test/results/clientpositive/ppd_union_view.q.out @@ -201,6 +201,8 @@ STAGE PLANS: t1-subquery2:subq-subquery2:t1_mapping TableScan alias: t1_mapping + Statistics: + numRows: 1 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -214,6 +216,8 @@ STAGE PLANS: type: string expr: ds type: string + Statistics: + numRows: 1 dataSize: 12 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: key @@ -221,6 +225,8 @@ STAGE PLANS: t1-subquery2:subq-subquery2:t1_old TableScan alias: t1_old + Statistics: + numRows: 1 dataSize: 14 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -234,6 +240,8 @@ STAGE PLANS: type: string expr: ds type: string + Statistics: + numRows: 1 dataSize: 14 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: value @@ -404,12 +412,16 @@ STAGE PLANS: t1-subquery1:subq-subquery1:t1_new TableScan alias: t1_new + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (ds = '2011-10-13') type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Select Operator expressions: expr: key @@ -419,6 +431,8 @@ STAGE PLANS: expr: ds type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Union Select Operator expressions: @@ -575,12 +589,16 @@ STAGE PLANS: t1-subquery2:subq-subquery2:t1_mapping TableScan alias: t1_mapping + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (ds = '2011-10-15') type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Reduce Output Operator key expressions: expr: keymap @@ -593,6 +611,8 @@ STAGE PLANS: type: string expr: ds type: string + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE tag: 1 value expressions: expr: key @@ -600,12 +620,16 @@ STAGE PLANS: t1-subquery2:subq-subquery2:t1_old TableScan alias: t1_old + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (ds = '2011-10-15') type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Reduce Output Operator key expressions: expr: keymap @@ -618,6 +642,8 @@ STAGE PLANS: type: string expr: ds type: string + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE tag: 0 value expressions: expr: value @@ -700,6 +726,8 @@ STAGE PLANS: t1-subquery1:subq-subquery1:t1_new TableScan alias: t1_new + Statistics: + numRows: 1 dataSize: 11 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Select Operator expressions: @@ -710,6 +738,8 @@ STAGE PLANS: expr: ds type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 1 dataSize: 11 basicStatsState: COMPLETE colStatsState: NONE Union Select Operator expressions: diff --git ql/src/test/results/clientpositive/ppd_vc.q.out ql/src/test/results/clientpositive/ppd_vc.q.out index f3cc2e9..a12af6f 100644 --- ql/src/test/results/clientpositive/ppd_vc.q.out +++ ql/src/test/results/clientpositive/ppd_vc.q.out @@ -22,12 +22,16 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (BLOCK__OFFSET__INSIDE__FILE < 100) type: boolean + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -39,11 +43,15 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -318,6 +326,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Reduce Output Operator key expressions: @@ -327,16 +337,22 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 0 b:srcpart TableScan alias: srcpart + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((BLOCK__OFFSET__INSIDE__FILE < 100) and (BLOCK__OFFSET__INSIDE__FILE < 50)) type: boolean + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -350,6 +366,8 @@ STAGE PLANS: expr: BLOCK__OFFSET__INSIDE__FILE type: bigint outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -358,6 +376,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: _col0 diff --git ql/src/test/results/clientpositive/ppr_allchildsarenull.q.out ql/src/test/results/clientpositive/ppr_allchildsarenull.q.out index d2dcd9c..9b3a205 100644 --- ql/src/test/results/clientpositive/ppr_allchildsarenull.q.out +++ ql/src/test/results/clientpositive/ppr_allchildsarenull.q.out @@ -30,12 +30,16 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (CASE WHEN (((value like 'aaa%') or (value like 'vvv%'))) THEN (1) ELSE (0) END > 0) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: UDFToInteger(key) @@ -43,11 +47,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -215,12 +223,16 @@ STAGE PLANS: srcpart TableScan alias: srcpart + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((ds = '2008-04-08') and (CASE WHEN (((value like 'aaa%') or (value like 'vvv%'))) THEN (1) ELSE (0) END > 0)) type: boolean + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: UDFToInteger(key) @@ -228,11 +240,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/push_or.q.out ql/src/test/results/clientpositive/push_or.q.out index 43224e1..9f996e2 100644 --- ql/src/test/results/clientpositive/push_or.q.out +++ ql/src/test/results/clientpositive/push_or.q.out @@ -51,12 +51,16 @@ STAGE PLANS: push_or TableScan alias: push_or + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((ds = '2000-04-09') or (key = 5)) type: boolean + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -66,6 +70,8 @@ STAGE PLANS: expr: ds type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -73,6 +79,8 @@ STAGE PLANS: expr: _col2 type: string sort order: ++ + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -172,11 +180,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 40 dataSize: 320 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/rand_partitionpruner1.q.out ql/src/test/results/clientpositive/rand_partitionpruner1.q.out index 752a2c1..6cc85ef 100644 --- ql/src/test/results/clientpositive/rand_partitionpruner1.q.out +++ ql/src/test/results/clientpositive/rand_partitionpruner1.q.out @@ -18,12 +18,16 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (rand(1) < 0.1) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -31,11 +35,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/rand_partitionpruner2.q.out ql/src/test/results/clientpositive/rand_partitionpruner2.q.out index ba6e78c..ecbe27e 100644 --- ql/src/test/results/clientpositive/rand_partitionpruner2.q.out +++ ql/src/test/results/clientpositive/rand_partitionpruner2.q.out @@ -35,12 +35,16 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (rand(1) < 0.1) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -52,11 +56,15 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/rand_partitionpruner3.q.out ql/src/test/results/clientpositive/rand_partitionpruner3.q.out index 989151d..e714d6d 100644 --- ql/src/test/results/clientpositive/rand_partitionpruner3.q.out +++ ql/src/test/results/clientpositive/rand_partitionpruner3.q.out @@ -20,12 +20,16 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((rand(1) < 0.1) and (not ((key > 50) or (key < 10)))) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -37,11 +41,15 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -145,12 +153,16 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (not ((key > 50) or (key < 10))) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -162,11 +174,15 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/regexp_extract.q.out ql/src/test/results/clientpositive/regexp_extract.q.out index 89bda11..011a204 100644 --- ql/src/test/results/clientpositive/regexp_extract.q.out +++ ql/src/test/results/clientpositive/regexp_extract.q.out @@ -30,6 +30,8 @@ STAGE PLANS: tmap:src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -42,6 +44,8 @@ STAGE PLANS: expr: (3 + 4) type: int outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Transform Operator command: cat output info: @@ -55,11 +59,15 @@ STAGE PLANS: serialization.last.column.takes.rest true serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Filter Operator isSamplingPred: false predicate: expr: (_col0 < 100) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -68,6 +76,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -124,6 +134,8 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -131,11 +143,15 @@ STAGE PLANS: expr: regexp_extract(_col1, 'val_(\d+\t\d+)', 1) type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -293,6 +309,8 @@ STAGE PLANS: tmap:src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -305,6 +323,8 @@ STAGE PLANS: expr: (3 + 4) type: int outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Transform Operator command: cat output info: @@ -318,11 +338,15 @@ STAGE PLANS: serialization.last.column.takes.rest true serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Filter Operator isSamplingPred: false predicate: expr: (_col0 < 100) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -331,6 +355,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -387,6 +413,8 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -394,11 +422,15 @@ STAGE PLANS: expr: regexp_extract(_col1, 'val_(\d+\t\d+)') type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/router_join_ppr.q.out ql/src/test/results/clientpositive/router_join_ppr.q.out index c2cd589..1b637e1 100644 --- ql/src/test/results/clientpositive/router_join_ppr.q.out +++ ql/src/test/results/clientpositive/router_join_ppr.q.out @@ -30,12 +30,16 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key > 15) and (key < 25)) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -44,6 +48,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -53,12 +59,16 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key > 15) and (key < 25)) type: boolean + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -67,6 +77,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: key @@ -425,12 +437,16 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key > 15) and (key < 25)) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -439,6 +455,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -448,12 +466,16 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key > 15) and (key < 25)) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -462,6 +484,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: key @@ -723,12 +747,16 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key > 15) and (key < 25)) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -737,6 +765,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -746,12 +776,16 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key > 15) and (key < 25)) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -760,6 +794,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: key @@ -1021,12 +1057,16 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key > 15) and (key < 25)) type: boolean + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -1035,6 +1075,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -1046,12 +1088,16 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((key > 15) and (key < 25)) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -1060,6 +1106,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: key diff --git ql/src/test/results/clientpositive/sample1.q.out ql/src/test/results/clientpositive/sample1.q.out index 789708e..f94b40c 100644 --- ql/src/test/results/clientpositive/sample1.q.out +++ ql/src/test/results/clientpositive/sample1.q.out @@ -35,12 +35,16 @@ STAGE PLANS: s TableScan alias: s + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: expr: (((hash(rand()) & 2147483647) % 1) = 0) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: UDFToInteger(key) @@ -52,11 +56,15 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/sample10.q.out ql/src/test/results/clientpositive/sample10.q.out index 3ca86a3..8aa8baa 100644 --- ql/src/test/results/clientpositive/sample10.q.out +++ ql/src/test/results/clientpositive/sample10.q.out @@ -63,17 +63,23 @@ STAGE PLANS: srcpartbucket TableScan alias: srcpartbucket + Statistics: + numRows: 40 dataSize: 240 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: expr: (((hash(key) & 2147483647) % 4) = 0) type: boolean + Statistics: + numRows: 40 dataSize: 240 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: ds type: string outputColumnNames: ds + Statistics: + numRows: 40 dataSize: 240 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -83,6 +89,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 40 dataSize: 240 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -91,6 +99,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 40 dataSize: 240 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -290,6 +300,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 40 dataSize: 240 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -297,6 +309,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 40 dataSize: 240 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -326,6 +340,8 @@ STAGE PLANS: expr: _col0 type: string sort order: + + Statistics: + numRows: 40 dataSize: 240 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -360,11 +376,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 40 dataSize: 240 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 40 dataSize: 240 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/sample2.q.out ql/src/test/results/clientpositive/sample2.q.out index 35fb40f..ca33564a 100644 --- ql/src/test/results/clientpositive/sample2.q.out +++ ql/src/test/results/clientpositive/sample2.q.out @@ -35,12 +35,16 @@ STAGE PLANS: s TableScan alias: s + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: expr: (((hash(key) & 2147483647) % 2) = 0) type: boolean + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -48,11 +52,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/sample4.q.out ql/src/test/results/clientpositive/sample4.q.out index 6516091..3c41565 100644 --- ql/src/test/results/clientpositive/sample4.q.out +++ ql/src/test/results/clientpositive/sample4.q.out @@ -35,12 +35,16 @@ STAGE PLANS: s TableScan alias: s + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: expr: (((hash(key) & 2147483647) % 2) = 0) type: boolean + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -48,11 +52,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/sample5.q.out ql/src/test/results/clientpositive/sample5.q.out index 12ccb42..5391397 100644 --- ql/src/test/results/clientpositive/sample5.q.out +++ ql/src/test/results/clientpositive/sample5.q.out @@ -33,12 +33,16 @@ STAGE PLANS: s TableScan alias: s + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: expr: (((hash(key) & 2147483647) % 5) = 0) type: boolean + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -46,11 +50,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/sample6.q.out ql/src/test/results/clientpositive/sample6.q.out index b1223df..6c363f7 100644 --- ql/src/test/results/clientpositive/sample6.q.out +++ ql/src/test/results/clientpositive/sample6.q.out @@ -33,12 +33,16 @@ STAGE PLANS: s TableScan alias: s + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: expr: (((hash(key) & 2147483647) % 4) = 0) type: boolean + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -46,11 +50,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -589,12 +597,16 @@ STAGE PLANS: s TableScan alias: s + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: expr: (((hash(key) & 2147483647) % 4) = 3) type: boolean + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -602,6 +614,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -609,6 +623,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -667,11 +683,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -966,12 +986,16 @@ STAGE PLANS: s TableScan alias: s + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: expr: (((hash(key) & 2147483647) % 2) = 0) type: boolean + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -979,6 +1003,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -986,6 +1012,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -1044,11 +1072,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1597,12 +1629,16 @@ STAGE PLANS: s TableScan alias: s + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: expr: (((hash(key) & 2147483647) % 3) = 0) type: boolean + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -1610,6 +1646,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -1617,6 +1655,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -1675,11 +1715,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2071,12 +2115,16 @@ STAGE PLANS: s TableScan alias: s + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: expr: (((hash(key) & 2147483647) % 3) = 1) type: boolean + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -2084,6 +2132,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -2091,6 +2141,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -2149,11 +2201,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2531,12 +2587,16 @@ STAGE PLANS: s TableScan alias: s + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: expr: (((hash(key) & 2147483647) % 2) = 0) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -2544,6 +2604,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -2551,6 +2613,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -2654,11 +2718,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2836,12 +2904,16 @@ STAGE PLANS: s TableScan alias: s + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: expr: (((hash(key) & 2147483647) % 4) = 1) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -2849,6 +2921,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -2856,6 +2930,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -2914,11 +2990,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -3019,12 +3099,16 @@ STAGE PLANS: s TableScan alias: s + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: expr: (((hash(key) & 2147483647) % 2) = 0) type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Select Operator expressions: expr: key @@ -3032,6 +3116,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -3039,6 +3125,8 @@ STAGE PLANS: expr: _col1 type: string sort order: ++ + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -3048,11 +3136,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/sample7.q.out ql/src/test/results/clientpositive/sample7.q.out index 13d0c0a..77d9668 100644 --- ql/src/test/results/clientpositive/sample7.q.out +++ ql/src/test/results/clientpositive/sample7.q.out @@ -35,12 +35,16 @@ STAGE PLANS: s TableScan alias: s + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((((hash(key) & 2147483647) % 4) = 0) and (key > 100)) type: boolean + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -48,11 +52,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/sample8.q.out ql/src/test/results/clientpositive/sample8.q.out index 51ba763..be1ac2a 100644 --- ql/src/test/results/clientpositive/sample8.q.out +++ ql/src/test/results/clientpositive/sample8.q.out @@ -31,12 +31,16 @@ STAGE PLANS: s TableScan alias: s + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: expr: ((((hash(key) & 2147483647) % 10) = 0) and (((hash(key) & 2147483647) % 1) = 0)) type: boolean + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -49,6 +53,8 @@ STAGE PLANS: type: string expr: value type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE tag: 0 value expressions: expr: key @@ -62,12 +68,16 @@ STAGE PLANS: t TableScan alias: t + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: expr: ((((hash(key) & 2147483647) % 1) = 0) and (((hash(key) & 2147483647) % 10) = 0)) type: boolean + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -80,6 +90,8 @@ STAGE PLANS: type: string expr: value type: string + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE tag: 1 value expressions: expr: key diff --git ql/src/test/results/clientpositive/sample9.q.out ql/src/test/results/clientpositive/sample9.q.out index 37c1fba..ee3a81c 100644 --- ql/src/test/results/clientpositive/sample9.q.out +++ ql/src/test/results/clientpositive/sample9.q.out @@ -20,12 +20,16 @@ STAGE PLANS: s:a TableScan alias: a + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: expr: (((hash(key) & 2147483647) % 2) = 0) type: boolean + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -33,11 +37,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11603 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/serde_user_properties.q.out ql/src/test/results/clientpositive/serde_user_properties.q.out index 9eb1d11..3588e6a 100644 --- ql/src/test/results/clientpositive/serde_user_properties.q.out +++ ql/src/test/results/clientpositive/serde_user_properties.q.out @@ -20,17 +20,23 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -117,17 +123,23 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -214,17 +226,23 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -296,6 +314,7 @@ STAGE PLANS: Fetch Operator limit: -1 + PREHOOK: query: explain extended select key from src ('user.defined.key'='some.value') PREHOOK: type: QUERY POSTHOOK: query: explain extended select key from src ('user.defined.key'='some.value') @@ -314,17 +333,23 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -413,17 +438,23 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -516,17 +547,23 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -615,17 +652,23 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin9.q.out ql/src/test/results/clientpositive/smb_mapjoin9.q.out index 074a60d..18ea36e 100644 --- ql/src/test/results/clientpositive/smb_mapjoin9.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin9.q.out @@ -36,12 +36,16 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((ds = '2010-10-15') and key is not null) type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -133,12 +137,16 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((ds = '2010-10-15') and key is not null) type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 diff --git ql/src/test/results/clientpositive/smb_mapjoin_11.q.out ql/src/test/results/clientpositive/smb_mapjoin_11.q.out index e60e67d..8a50cd1 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_11.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_11.q.out @@ -70,6 +70,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/smb_mapjoin_12.q.out ql/src/test/results/clientpositive/smb_mapjoin_12.q.out index 96d285f..b1b3e32 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_12.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_12.q.out @@ -90,6 +90,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -310,6 +312,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 3084 dataSize: 32904 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/smb_mapjoin_13.q.out ql/src/test/results/clientpositive/smb_mapjoin_13.q.out index bff9566..f581ae7 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_13.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_13.q.out @@ -86,6 +86,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -271,6 +273,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -288,6 +292,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/smb_mapjoin_15.q.out ql/src/test/results/clientpositive/smb_mapjoin_15.q.out index fda6b75..e652916 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_15.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_15.q.out @@ -60,6 +60,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -304,6 +306,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 500 dataSize: 7218 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -496,6 +500,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 500 dataSize: 7218 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Sorted Merge Bucket Map Join Operator condition map: @@ -693,6 +699,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 500 dataSize: 7218 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -717,6 +725,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 500 dataSize: 7218 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out index e60565f..491d7c6 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out @@ -127,6 +127,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -151,6 +153,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 500 dataSize: 5312 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out index d0ed79e..bcf5b21 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out @@ -232,6 +232,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 1000 dataSize: 10624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -249,6 +251,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 1000 dataSize: 10624 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/stats0.q.out ql/src/test/results/clientpositive/stats0.q.out index 2bef610..7958a7d 100644 --- ql/src/test/results/clientpositive/stats0.q.out +++ ql/src/test/results/clientpositive/stats0.q.out @@ -26,6 +26,8 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -34,11 +36,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1367,6 +1373,8 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -1375,11 +1383,15 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/stats11.q.out ql/src/test/results/clientpositive/stats11.q.out index db75be8..f3697b2 100644 --- ql/src/test/results/clientpositive/stats11.q.out +++ ql/src/test/results/clientpositive/stats11.q.out @@ -348,6 +348,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -372,6 +374,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: @@ -839,6 +843,8 @@ STAGE PLANS: a TableScan alias: a + Statistics: + numRows: 0 dataSize: 2750 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false HashTable Sink Operator condition expressions: @@ -863,6 +869,8 @@ STAGE PLANS: b TableScan alias: b + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Map Join Operator condition map: diff --git ql/src/test/results/clientpositive/stats12.q.out ql/src/test/results/clientpositive/stats12.q.out index ee04470..287b865 100644 --- ql/src/test/results/clientpositive/stats12.q.out +++ ql/src/test/results/clientpositive/stats12.q.out @@ -58,6 +58,8 @@ STAGE PLANS: analyze_srcpart TableScan alias: analyze_srcpart + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: COMPLETE Statistics Aggregation Key Prefix: analyze_srcpart/ GatherStats: true Path -> Alias: diff --git ql/src/test/results/clientpositive/stats13.q.out ql/src/test/results/clientpositive/stats13.q.out index 837966c..699fda2 100644 --- ql/src/test/results/clientpositive/stats13.q.out +++ ql/src/test/results/clientpositive/stats13.q.out @@ -58,6 +58,8 @@ STAGE PLANS: analyze_srcpart TableScan alias: analyze_srcpart + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: COMPLETE Statistics Aggregation Key Prefix: analyze_srcpart/ GatherStats: true Path -> Alias: diff --git ql/src/test/results/clientpositive/transform_ppr1.q.out ql/src/test/results/clientpositive/transform_ppr1.q.out index 70ff143..d6b96c2 100644 --- ql/src/test/results/clientpositive/transform_ppr1.q.out +++ ql/src/test/results/clientpositive/transform_ppr1.q.out @@ -30,6 +30,8 @@ STAGE PLANS: tmap:src TableScan alias: src + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -40,6 +42,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Transform Operator command: cat output info: @@ -52,11 +56,15 @@ STAGE PLANS: serialization.format 9 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Filter Operator isSamplingPred: false predicate: expr: ((_col1 < 100) and (_col0 = '2008-04-08')) type: boolean + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col1 @@ -65,6 +73,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col1 type: string + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -252,6 +262,8 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col1 @@ -259,11 +271,15 @@ STAGE PLANS: expr: _col2 type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/transform_ppr2.q.out ql/src/test/results/clientpositive/transform_ppr2.q.out index 4e9824c..03a9cdf 100644 --- ql/src/test/results/clientpositive/transform_ppr2.q.out +++ ql/src/test/results/clientpositive/transform_ppr2.q.out @@ -32,6 +32,8 @@ STAGE PLANS: tmap:src TableScan alias: src + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -42,6 +44,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Transform Operator command: cat output info: @@ -54,11 +58,15 @@ STAGE PLANS: serialization.format 9 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Filter Operator isSamplingPred: false predicate: expr: (_col1 < 100) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col1 @@ -67,6 +75,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col1 type: string + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -168,6 +178,8 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col1 @@ -175,11 +187,15 @@ STAGE PLANS: expr: _col2 type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/udf_explode.q.out ql/src/test/results/clientpositive/udf_explode.q.out index 0787984..89a0582 100644 --- ql/src/test/results/clientpositive/udf_explode.q.out +++ ql/src/test/results/clientpositive/udf_explode.q.out @@ -27,19 +27,27 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE GatherStats: false Select Operator expressions: expr: array(1,2,3) type: array outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE UDTF Operator + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE function name: explode File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -127,19 +135,27 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE GatherStats: false Select Operator expressions: expr: array(1,2,3) type: array outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE UDTF Operator + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE function name: explode Select Operator expressions: expr: col type: int outputColumnNames: col + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Group By Operator aggregations: expr: count(1) @@ -149,6 +165,8 @@ STAGE PLANS: type: int mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Reduce Output Operator key expressions: expr: _col0 @@ -157,6 +175,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: int + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE tag: -1 value expressions: expr: _col1 @@ -219,6 +239,8 @@ STAGE PLANS: type: int mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Select Operator expressions: expr: _col0 @@ -226,11 +248,15 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -304,19 +330,27 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE GatherStats: false Select Operator expressions: expr: map(1:'one',2:'two',3:'three') type: map outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE UDTF Operator + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE function name: explode File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -404,13 +438,19 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE GatherStats: false Select Operator expressions: expr: map(1:'one',2:'two',3:'three') type: map outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE UDTF Operator + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE function name: explode Select Operator expressions: @@ -419,6 +459,8 @@ STAGE PLANS: expr: value type: string outputColumnNames: key, value + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Group By Operator aggregations: expr: count(1) @@ -430,6 +472,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Reduce Output Operator key expressions: expr: _col0 @@ -442,6 +486,8 @@ STAGE PLANS: type: int expr: _col1 type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE tag: -1 value expressions: expr: _col2 @@ -506,6 +552,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Select Operator expressions: expr: _col0 @@ -515,11 +563,15 @@ STAGE PLANS: expr: _col2 type: bigint outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/udf_java_method.q.out ql/src/test/results/clientpositive/udf_java_method.q.out index 022096b..8548b50 100644 --- ql/src/test/results/clientpositive/udf_java_method.q.out +++ ql/src/test/results/clientpositive/udf_java_method.q.out @@ -49,6 +49,8 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE GatherStats: false Select Operator expressions: @@ -67,6 +69,8 @@ STAGE PLANS: expr: reflect('java.lang.Math','floor',1.9) type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE ListSink diff --git ql/src/test/results/clientpositive/udf_reflect.q.out ql/src/test/results/clientpositive/udf_reflect.q.out index 18b96f3..5d86d2d 100644 --- ql/src/test/results/clientpositive/udf_reflect.q.out +++ ql/src/test/results/clientpositive/udf_reflect.q.out @@ -47,6 +47,8 @@ STAGE PLANS: TableScan alias: src Row Limit Per Split: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -67,6 +69,8 @@ STAGE PLANS: expr: reflect('java.lang.Integer','valueOf',key,16) type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE ListSink diff --git ql/src/test/results/clientpositive/udf_reflect2.q.out ql/src/test/results/clientpositive/udf_reflect2.q.out index fc50bd6..36505f9 100644 --- ql/src/test/results/clientpositive/udf_reflect2.q.out +++ ql/src/test/results/clientpositive/udf_reflect2.q.out @@ -92,6 +92,8 @@ STAGE PLANS: a:src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Select Operator expressions: @@ -102,6 +104,8 @@ STAGE PLANS: expr: CAST( '2013-02-15 19:41:20' AS TIMESTAMP) type: timestamp outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -167,12 +171,18 @@ STAGE PLANS: expr: reflect2(_col2,'getTime') type: bigint outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE Limit + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/udtf_explode.q.out ql/src/test/results/clientpositive/udtf_explode.q.out index 36b3727..b19f15d 100644 --- ql/src/test/results/clientpositive/udtf_explode.q.out +++ ql/src/test/results/clientpositive/udtf_explode.q.out @@ -26,20 +26,30 @@ STAGE PLANS: src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE GatherStats: false Select Operator expressions: expr: array(1,2,3) type: array outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE UDTF Operator + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE function name: explode Limit + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -127,17 +137,27 @@ STAGE PLANS: a:src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE GatherStats: false Select Operator expressions: expr: array(1,2,3) type: array outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE UDTF Operator + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE function name: explode Limit + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Reduce Output Operator sort order: + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE tag: -1 value expressions: expr: col @@ -192,12 +212,18 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Limit + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Select Operator expressions: expr: _col0 type: int outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Group By Operator aggregations: expr: count(1) @@ -207,6 +233,8 @@ STAGE PLANS: type: int mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 @@ -239,6 +267,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: int + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE tag: -1 value expressions: expr: _col1 @@ -279,6 +309,8 @@ STAGE PLANS: type: int mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Select Operator expressions: expr: _col0 @@ -286,11 +318,15 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -403,17 +439,27 @@ STAGE PLANS: a:src TableScan alias: src + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE GatherStats: false Select Operator expressions: expr: map(1:'one',2:'two',3:'three') type: map outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE UDTF Operator + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE function name: explode Limit + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Reduce Output Operator sort order: + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE tag: -1 value expressions: expr: key @@ -470,7 +516,11 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Limit + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Select Operator expressions: expr: _col0 @@ -478,6 +528,8 @@ STAGE PLANS: expr: _col1 type: string outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Group By Operator aggregations: expr: count(1) @@ -489,6 +541,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 @@ -525,6 +579,8 @@ STAGE PLANS: type: int expr: _col1 type: string + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE tag: -1 value expressions: expr: _col2 @@ -567,6 +623,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE Select Operator expressions: expr: _col0 @@ -576,11 +634,15 @@ STAGE PLANS: expr: _col2 type: bigint outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 5812 basicStatsState: PARTIAL colStatsState: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/union22.q.out ql/src/test/results/clientpositive/union22.q.out index c7ffcdb..01a5fe3 100644 --- ql/src/test/results/clientpositive/union22.q.out +++ ql/src/test/results/clientpositive/union22.q.out @@ -373,12 +373,16 @@ STAGE PLANS: null-subquery1:subq-subquery1:dst_union22_delta TableScan alias: dst_union22_delta + Statistics: + numRows: 500 dataSize: 16936 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (k0 <= 50) type: boolean + Statistics: + numRows: 500 dataSize: 16936 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: k1 @@ -390,6 +394,8 @@ STAGE PLANS: expr: k4 type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 500 dataSize: 16936 basicStatsState: COMPLETE colStatsState: NONE Union Select Operator expressions: @@ -531,12 +537,16 @@ STAGE PLANS: null-subquery2:subq-subquery2:a TableScan alias: a + Statistics: + numRows: 500 dataSize: 11124 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (k1 > 20) type: boolean + Statistics: + numRows: 500 dataSize: 11124 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: k1 @@ -545,6 +555,8 @@ STAGE PLANS: Map-reduce partition columns: expr: k1 type: string + Statistics: + numRows: 500 dataSize: 11124 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: k1 @@ -556,12 +568,16 @@ STAGE PLANS: null-subquery2:subq-subquery2:b:dst_union22_delta TableScan alias: dst_union22_delta + Statistics: + numRows: 500 dataSize: 16936 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: ((k0 > 50) and (k1 > 20)) type: boolean + Statistics: + numRows: 500 dataSize: 16936 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: k1 @@ -571,6 +587,8 @@ STAGE PLANS: expr: k4 type: string outputColumnNames: _col1, _col3, _col4 + Statistics: + numRows: 500 dataSize: 16936 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col1 @@ -579,6 +597,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col1 type: string + Statistics: + numRows: 500 dataSize: 16936 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: _col3 diff --git ql/src/test/results/clientpositive/union24.q.out ql/src/test/results/clientpositive/union24.q.out index 580bf0d..d1292f4 100644 --- ql/src/test/results/clientpositive/union24.q.out +++ ql/src/test/results/clientpositive/union24.q.out @@ -65,17 +65,23 @@ STAGE PLANS: null-subquery2:s-subquery2:src5 TableScan alias: src5 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 10) type: boolean + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key type: string outputColumnNames: key + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Group By Operator aggregations: expr: count(1) @@ -85,6 +91,8 @@ STAGE PLANS: type: string mode: hash outputColumnNames: _col0, _col1 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -93,6 +101,8 @@ STAGE PLANS: Map-reduce partition columns: expr: _col0 type: string + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col1 @@ -155,6 +165,8 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -162,6 +174,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 @@ -187,6 +201,8 @@ STAGE PLANS: TableScan GatherStats: false Union + Statistics: + numRows: 1236 dataSize: 5928 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -194,6 +210,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 1236 dataSize: 5928 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -201,6 +219,8 @@ STAGE PLANS: expr: _col1 type: bigint sort order: ++ + Statistics: + numRows: 1236 dataSize: 5928 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -210,12 +230,16 @@ STAGE PLANS: null-subquery1-subquery1-subquery1:s-subquery1-subquery1-subquery1:src2 TableScan alias: src2 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 10) type: boolean + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -223,7 +247,11 @@ STAGE PLANS: expr: count type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Union + Statistics: + numRows: 1236 dataSize: 5928 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -231,6 +259,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 1236 dataSize: 5928 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -238,6 +268,8 @@ STAGE PLANS: expr: _col1 type: bigint sort order: ++ + Statistics: + numRows: 1236 dataSize: 5928 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -247,12 +279,16 @@ STAGE PLANS: null-subquery1-subquery1-subquery2:s-subquery1-subquery1-subquery2:src3 TableScan alias: src3 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 10) type: boolean + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -260,7 +296,11 @@ STAGE PLANS: expr: count type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Union + Statistics: + numRows: 1236 dataSize: 5928 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -268,6 +308,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 1236 dataSize: 5928 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -275,6 +317,8 @@ STAGE PLANS: expr: _col1 type: bigint sort order: ++ + Statistics: + numRows: 1236 dataSize: 5928 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -284,12 +328,16 @@ STAGE PLANS: null-subquery1-subquery2:s-subquery1-subquery2:src4 TableScan alias: src4 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 10) type: boolean + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -297,7 +345,11 @@ STAGE PLANS: expr: count type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Union + Statistics: + numRows: 1236 dataSize: 5928 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: _col0 @@ -305,6 +357,8 @@ STAGE PLANS: expr: _col1 type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 1236 dataSize: 5928 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -312,6 +366,8 @@ STAGE PLANS: expr: _col1 type: bigint sort order: ++ + Statistics: + numRows: 1236 dataSize: 5928 basicStatsState: COMPLETE colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -475,11 +531,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 1236 dataSize: 5928 basicStatsState: COMPLETE colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 1236 dataSize: 5928 basicStatsState: COMPLETE colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -592,12 +652,16 @@ STAGE PLANS: null-subquery2:s-subquery2:a TableScan alias: a + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 10) type: boolean + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -606,6 +670,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: key @@ -613,12 +679,16 @@ STAGE PLANS: null-subquery2:s-subquery2:b TableScan alias: b + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 10) type: boolean + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -627,6 +697,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE tag: 1 value expressions: expr: count @@ -786,12 +858,16 @@ STAGE PLANS: null-subquery1-subquery1:s-subquery1-subquery1:src2 TableScan alias: src2 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 10) type: boolean + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -799,6 +875,8 @@ STAGE PLANS: expr: count type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Union Select Operator expressions: @@ -823,12 +901,16 @@ STAGE PLANS: null-subquery1-subquery2:s-subquery1-subquery2:src3 TableScan alias: src3 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 10) type: boolean + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -836,6 +918,8 @@ STAGE PLANS: expr: count type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Union Select Operator expressions: @@ -1079,12 +1163,16 @@ STAGE PLANS: null-subquery2:s-subquery2:a TableScan alias: a + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 10) type: boolean + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -1093,6 +1181,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE tag: 0 value expressions: expr: key @@ -1100,12 +1190,16 @@ STAGE PLANS: null-subquery2:s-subquery2:b TableScan alias: b + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 10) type: boolean + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Reduce Output Operator key expressions: expr: key @@ -1114,6 +1208,8 @@ STAGE PLANS: Map-reduce partition columns: expr: key type: string + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE tag: 1 Path -> Alias: #### A masked pattern was here #### @@ -1356,12 +1452,16 @@ STAGE PLANS: null-subquery1-subquery1:s-subquery1-subquery1:src2 TableScan alias: src2 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 10) type: boolean + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -1369,6 +1469,8 @@ STAGE PLANS: expr: count type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Union Select Operator expressions: @@ -1393,12 +1495,16 @@ STAGE PLANS: null-subquery1-subquery2:s-subquery1-subquery2:src3 TableScan alias: src3 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 10) type: boolean + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Select Operator expressions: expr: key @@ -1406,6 +1512,8 @@ STAGE PLANS: expr: count type: bigint outputColumnNames: _col0, _col1 + Statistics: + numRows: 309 dataSize: 1482 basicStatsState: COMPLETE colStatsState: NONE Union Select Operator expressions: diff --git ql/src/test/results/clientpositive/union_ppr.q.out ql/src/test/results/clientpositive/union_ppr.q.out index 82fb35b..b9e1b05 100644 --- ql/src/test/results/clientpositive/union_ppr.q.out +++ ql/src/test/results/clientpositive/union_ppr.q.out @@ -30,12 +30,16 @@ STAGE PLANS: null-subquery1:a-subquery1:x TableScan alias: x + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 100) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -47,7 +51,11 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Union + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -59,6 +67,8 @@ STAGE PLANS: expr: _col3 type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -70,6 +80,8 @@ STAGE PLANS: expr: _col3 type: string sort order: ++++ + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -83,12 +95,16 @@ STAGE PLANS: null-subquery2:a-subquery2:y TableScan alias: y + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (key < 100) type: boolean + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: key @@ -100,7 +116,11 @@ STAGE PLANS: expr: hr type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 11624 basicStatsState: PARTIAL colStatsState: NONE Union + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Select Operator expressions: expr: _col0 @@ -112,6 +132,8 @@ STAGE PLANS: expr: _col3 type: string outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE Reduce Output Operator key expressions: expr: _col0 @@ -123,6 +145,8 @@ STAGE PLANS: expr: _col3 type: string sort order: ++++ + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE tag: -1 value expressions: expr: _col0 @@ -226,11 +250,15 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Extract + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 23248 basicStatsState: PARTIAL colStatsState: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat