diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 3a8efcd..8c63f15 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -572,6 +572,9 @@ HIVESAMPLINGNUMBERFORORDERBY("hive.optimize.sampling.orderby.number", 1000), HIVESAMPLINGPERCENTFORORDERBY("hive.optimize.sampling.orderby.percent", 0.1f), + // annotate hive operator tree with statistics information + HIVE_ANNOTATE_STATS("hive.annotate.stats", false), + // whether to optimize union followed by select followed by filesink // It creates sub-directories in the final output, so should not be turned on in systems // where MAPREDUCE-1501 is not present diff --git data/files/alltypes.txt data/files/alltypes.txt new file mode 100644 index 0000000..594b299 --- /dev/null +++ data/files/alltypes.txt @@ -0,0 +1,2 @@ +true|10|100|1000|10000|4.0|20.0|2.2222|1969-12-31 15:59:58.174|1970-01-01 00:00:00|hello|k1:v1,k2:v2|100,200|{10, "foo"} +false|20|200|2000|20000|8.0|40.0|4.2222|1970-12-31 15:59:58.174|1971-01-01 00:00:00||k3:v3,k4:v4|200,300|{20, "bar"} diff --git data/files/dept.txt data/files/dept.txt new file mode 100644 index 0000000..292bee6 --- /dev/null +++ data/files/dept.txt @@ -0,0 +1,4 @@ +31|sales +33|engineering +34|clerical +35|marketing diff --git data/files/emp.txt data/files/emp.txt new file mode 100644 index 0000000..a0e76b9 --- /dev/null +++ data/files/emp.txt @@ -0,0 +1,6 @@ +Rafferty|31 +Jones|33 +Steinberg|33 +Robinson|34 +Smith|34 +John| diff --git data/files/loc.txt data/files/loc.txt new file mode 100644 index 0000000..69910b7 --- /dev/null +++ data/files/loc.txt @@ -0,0 +1,8 @@ +OH|31|43201|2001 +IO|32|43202|2001 +CA|35|43809|2001 +FL|33|54342|2001 +UT|35||2001 +CA|35|43809|2001 +|34|40000| +FL|33|54342|2001 diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index bad4f48..27117cd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -408,6 +408,8 @@ DROP_COMMAND_NOT_ALLOWED_FOR_PARTITION(30011, "Partition protected from being dropped"), COLUMNSTATSCOLLECTOR_INVALID_COLUMN(30012, "Column statistics are not supported " + "for partition columns"), + + STATISTICS_CLONING_FAILED(30013, "Cloning of statistics failed"), ; private int errorCode; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index 9fc7afa..9f1c790 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -1612,4 +1613,12 @@ static boolean toString(StringBuilder builder, Set visited, Operator } return false; } + + public Statistics getStatistics() { + return getConf().getStatistics(); + } + + public void setStatistics(Statistics stats) { + getConf().setStatistics(stats); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index f723def..6996d20 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -171,6 +171,7 @@ import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; import com.esotericsoftware.kryo.serializers.FieldSerializer; +import com.google.common.base.Joiner; /** * Utilities. @@ -3077,5 +3078,35 @@ private static void createTmpDirs(Configuration conf, } } } + + /** + * Returns fully qualified name of column + * + * @param dbName + * @param tabName + * @param colName + * @return + */ + public static String getFullyQualifiedColumnName(String dbName, String tabName, String colName) { + return getFullyQualifiedName(dbName, tabName, colName); + } + + /** + * Returns fully qualified name of column + * + * @param dbName + * @param tabName + * @param partName + * @param colName + * @return + */ + public static String getFullyQualifiedColumnName(String dbName, String tabName, String partName, + String colName) { + return getFullyQualifiedName(dbName, tabName, partName, colName); + } + + private static String getFullyQualifiedName(String... names) { + return Joiner.on(".").skipNulls().join(names); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index b0f124b..e85a7b5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -91,6 +91,8 @@ import org.apache.hadoop.util.StringUtils; import org.apache.thrift.TException; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.google.common.collect.Sets; /** @@ -109,6 +111,9 @@ private HiveConf conf = null; private IMetaStoreClient metaStoreClient; + // cache column statistics to avoid multiple round trips to metastore. + private Map colStatsCache = Maps.newHashMap(); + private static ThreadLocal hiveDB = new ThreadLocal() { @Override protected synchronized Hive initialValue() { @@ -280,6 +285,10 @@ public void dropDatabase(String name, boolean deleteData, boolean ignoreUnknownD */ public void dropDatabase(String name, boolean deleteData, boolean ignoreUnknownDb, boolean cascade) throws HiveException, NoSuchObjectException { + + // invalidate column stats cache + resetColumnStatsCache(); + try { getMSC().dropDatabase(name, deleteData, ignoreUnknownDb, cascade); } catch (NoSuchObjectException e) { @@ -378,6 +387,10 @@ public void createTable(String tableName, List columns, */ public void alterTable(String tblName, Table newTbl) throws InvalidOperationException, HiveException { + + // invalidate column stats cache + resetColumnStatsCache(); + Table t = newTable(tblName); try { // Remove the DDL_TIME so it gets refreshed @@ -446,6 +459,10 @@ public void alterPartition(String tblName, Partition newPart) */ public void alterPartition(String dbName, String tblName, Partition newPart) throws InvalidOperationException, HiveException { + + // invalidate column stats cache + resetColumnStatsCache(); + try { // Remove the DDL time so that it gets refreshed if (newPart.getParameters() != null) { @@ -473,6 +490,10 @@ public void alterPartition(String dbName, String tblName, Partition newPart) */ public void alterPartitions(String tblName, List newParts) throws InvalidOperationException, HiveException { + + // invalidate column stats cache + resetColumnStatsCache(); + Table t = newTable(tblName); List newTParts = new ArrayList(); @@ -540,6 +561,10 @@ public void renamePartition(Table tbl, Map oldPartSpec, Partitio public void alterDatabase(String dbName, Database db) throws HiveException { + + // invalidate column stats cache + resetColumnStatsCache(); + try { getMSC().alterDatabase(dbName, db); } catch (MetaException e) { @@ -867,6 +892,9 @@ public void dropTable(String dbName, String tableName) throws HiveException { public void dropTable(String dbName, String tableName, boolean deleteData, boolean ignoreUnknownTab) throws HiveException { + // invalidate column stats cache + resetColumnStatsCache(); + try { getMSC().dropTable(dbName, tableName, deleteData, ignoreUnknownTab); } catch (NoSuchObjectException e) { @@ -1652,6 +1680,10 @@ public boolean dropPartition(String tblName, List part_vals, boolean del public boolean dropPartition(String db_name, String tbl_name, List part_vals, boolean deleteData) throws HiveException { + + // invalidate column stats cache + resetColumnStatsCache(); + try { return getMSC().dropPartition(db_name, tbl_name, part_vals, deleteData); } catch (NoSuchObjectException e) { @@ -2484,26 +2516,83 @@ public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj) throws } } + public List getTableColumnStatistics(String tableName) throws HiveException { + return getTableColumnStatistics("default", tableName); + } + + public List getTableColumnStatistics(String dbName, String tableName) + throws HiveException { + Table table = getTable(dbName, tableName); + List cols = table.getAllCols(); + List colNames = Utilities.getColumnNamesFromFieldSchema(cols); + return getTableColumnStatistics(dbName, tableName, colNames); + } + + public List getTableColumnStatistics(String dbName, String tableName, + List colNames) throws HiveException { + List colStats = Lists.newArrayList(); + for (String colName : colNames) { + colStats.add(getTableColumnStatistics(dbName, tableName, colName)); + } + return colStats; + } + public ColumnStatistics getTableColumnStatistics(String dbName, String tableName, String colName) throws HiveException { - try { - return getMSC().getTableColumnStatistics(dbName, tableName, colName); - } catch (Exception e) { - LOG.error(StringUtils.stringifyException(e)); - throw new HiveException(e); + String key = Utilities.getFullyQualifiedColumnName(dbName, tableName, colName); + if (colStatsCache.containsKey(key)) { + return colStatsCache.get(key); + } else { + try { + ColumnStatistics cs = getMSC().getTableColumnStatistics(dbName, tableName, colName); + colStatsCache.put(key, cs); + return cs; + } catch (Exception e) { + LOG.error(StringUtils.stringifyException(e)); + throw new HiveException(e); + } } + } + public List getPartitionColumnStatistics(String tableName, String partName) + throws HiveException { + return getPartitionColumnStatistics("default", tableName, partName); + } + + public List getPartitionColumnStatistics(String dbName, String tableName, + String partName) throws HiveException { + Table table = getTable(dbName, tableName); + List cols = table.getAllCols(); + List colNames = Utilities.getColumnNamesFromFieldSchema(cols); + return getPartitionColumnStatistics(dbName, tableName, partName, colNames); + } + + public List getPartitionColumnStatistics(String dbName, String tableName, + String partName, List colNames) throws HiveException { + List colStats = Lists.newArrayList(); + for (String colName : colNames) { + colStats.add(getPartitionColumnStatistics(dbName, tableName, partName, colName)); + } + return colStats; } public ColumnStatistics getPartitionColumnStatistics(String dbName, String tableName, - String partName, String colName) throws HiveException { + String partName, String colName) throws HiveException { + String key = Utilities.getFullyQualifiedColumnName(dbName, tableName, partName, colName); + if (colStatsCache.containsKey(key)) { + return colStatsCache.get(key); + } else { try { - return getMSC().getPartitionColumnStatistics(dbName, tableName, partName, colName); + ColumnStatistics cs = getMSC().getPartitionColumnStatistics(dbName, tableName, partName, + colName); + colStatsCache.put(key, cs); + return cs; } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); throw new HiveException(e); } } + } public boolean deleteTableColumnStatistics(String dbName, String tableName, String colName) throws HiveException { @@ -2565,4 +2654,10 @@ public void cancelDelegationToken(String tokenStrForm) private static String[] getQualifiedNames(String qualifiedName) { return qualifiedName.split("\\."); } + + public void resetColumnStatsCache() { + if(colStatsCache != null) { + colStatsCache.clear(); + } + } }; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 3a76bfc..e956435 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPruner; import org.apache.hadoop.hive.ql.optimizer.pcr.PartitionConditionRemover; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; +import org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcessor; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -111,6 +112,9 @@ public void initialize(HiveConf hiveConf) { if (HiveConf.getFloatVar(hiveConf, HiveConf.ConfVars.HIVELIMITPUSHDOWNMEMORYUSAGE) > 0) { transformations.add(new LimitPushdownOptimizer()); } + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_ANNOTATE_STATS)) { + transformations.add(new AnnotateWithStatistics()); + } transformations.add(new SimpleFetchOptimizer()); // must be called last if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEFETCHTASKAGGR)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java index 51bef04..12bb94c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java @@ -99,6 +99,8 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { SessionState.get().getLineageState().setIndex(lCtx.getIndex()); } + // set the lineage index in parse context + pctx.setLineageIndex(lCtx.getIndex()); return pctx; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateStatsProcCtx.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateStatsProcCtx.java new file mode 100644 index 0000000..181c12d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateStatsProcCtx.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.stats.annotation; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.plan.Statistics; + +public class AnnotateStatsProcCtx implements NodeProcessorCtx { + + private ParseContext pctx; + private HiveConf conf; + private Statistics andExprStats = null; + + public AnnotateStatsProcCtx(ParseContext pctx) { + this.setParseContext(pctx); + if(pctx != null) { + this.setConf(pctx.getConf()); + } else { + this.setConf(null); + } + } + + public HiveConf getConf() { + return conf; + } + + public void setConf(HiveConf conf) { + this.conf = conf; + } + + public ParseContext getParseContext() { + return pctx; + } + + public void setParseContext(ParseContext pctx) { + this.pctx = pctx; + } + + public Statistics getAndExprStats() { + return andExprStats; + } + + public void setAndExprStats(Statistics andExprStats) { + this.andExprStats = andExprStats; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java new file mode 100644 index 0000000..057c4b2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.stats.annotation; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; +import org.apache.hadoop.hive.ql.exec.DemuxOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.LimitOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.PTFOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.PreOrderWalker; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.optimizer.Transform; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +public class AnnotateWithStatistics implements Transform { + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + AnnotateStatsProcCtx aspCtx = new AnnotateStatsProcCtx(pctx); + + // create a walker which walks the tree in a DFS manner while maintaining the + // operator stack. The dispatcher generates the plan from the operator tree + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("TS", TableScanOperator.getOperatorName()+ "%"), StatsRulesProcFactory.getTableScanRule()); + opRules.put(new RuleRegExp("SEL", SelectOperator.getOperatorName()+ "%"), StatsRulesProcFactory.getSelectRule()); + opRules.put(new RuleRegExp("FIL", FilterOperator.getOperatorName()+ "%"), StatsRulesProcFactory.getFilterRule()); + opRules.put(new RuleRegExp("GBY", GroupByOperator.getOperatorName()+ "%"), StatsRulesProcFactory.getGroupByRule()); + opRules.put(new RuleRegExp("JOIN", CommonJoinOperator.getOperatorName() + "%|" + MapJoinOperator.getOperatorName() + "%"), StatsRulesProcFactory.getJoinRule()); + opRules.put(new RuleRegExp("DMX", DemuxOperator.getOperatorName()+ "%"), StatsRulesProcFactory.getDemuxRule()); + opRules.put(new RuleRegExp("LIM", LimitOperator.getOperatorName()+ "%"), StatsRulesProcFactory.getLimitRule()); + opRules.put(new RuleRegExp("PTF", PTFOperator.getOperatorName()+ "%"), StatsRulesProcFactory.getPTFRule()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(StatsRulesProcFactory.getDefaultRule(), opRules, aspCtx); + GraphWalker ogw = new PreOrderWalker(disp); + + // Create a list of topop nodes + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + + return pctx; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java new file mode 100644 index 0000000..c3e7ed2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -0,0 +1,983 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.stats.annotation; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Stack; + +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; +import org.apache.hadoop.hive.ql.exec.DemuxOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.LimitOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.PTFOperator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.lineage.LineageCtx.Index; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.stats.StatsUtils; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + +public class StatsRulesProcFactory { + + public static class TableScanStatsRule extends DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + TableScanOperator tsop = (TableScanOperator) nd; + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + PrunedPartitionList partList = null; + try { + partList = aspCtx.getParseContext().getPrunedPartitions(tsop.getName(), tsop); + } catch (HiveException e1) { + throw new SemanticException(e1); + } + Table table = aspCtx.getParseContext().getTopToTable().get(tsop); + + // gather statistics for the first time and the attach it to table scan operator + Statistics stats = StatsUtils.collectStatistics(aspCtx.getConf(), partList, table, tsop); + try { + tsop.setStatistics(stats.clone()); + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + return null; + } + } + + public static class SelectStatsRule extends DefaultStatsRule implements NodeProcessor { + // Projection doesn't change the number of rows emitted from the parent + // operator. It changes the size of each tuple emitted. In a typical case, + // where only subset of columns are selected the average row size will + // reduce as some of the columns will be pruned. In order to accurately + // compute the average row size, column level statistics is required. + // Column level statistics stores average size of values in column which + // can be used to more reliably estimate the reduction in size of each + // tuple. + + // For more information, refer 'Estimating The Cost Of Operations' chapter + // in "Database Systems: The Complete Book" by Garcia-Molina et. al. + + // In the absence of column level statistics we can only estimate the size for + // primitive data types. We cannot estimate the size of variable length types + // or complex types. So we will skip applying rules in the absence of column statistics + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + SelectOperator sop = (SelectOperator) nd; + Operator parent = sop.getParentOperators().get(0); + Statistics parentStats = parent.getStatistics(); + + // do not proceed if parent statistics is not available + if (parentStats != null) { + TableScanOperator tsop = StatsUtils.getRoots(sop).get(0); + Table table = aspCtx.getParseContext().getTopToTable().get(tsop); + String dbName = table.getDbName(); + String tableName = table.getTableName(); + Map partList = null; + + if (table.isPartitioned()) { + partList = aspCtx.getParseContext().getPrunedPartitions(); + } + + // PTFOperator and SELECT operator has the same rule. PTFOperator will have SELECT as its + // child. To prevent apply the same rules twice, we will return here. SELECT (*) also + // does not change the statistics. In both these cases we can pass on the parent statistics + if (sop.getConf().isSelectStar() || parent instanceof PTFOperator) { + try { + sop.setStatistics(parent.getStatistics().clone()); + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + return null; + } + + // get lineage information + Index lIndex = aspCtx.getParseContext().getLineageIndex(); + List inCols = sop.getConf().getColList(); + List outCols = sop.getConf().getOutputColumnNames(); + + Statistics result = StatsUtils.applySelectRule(dbName, tableName, sop, aspCtx, partList, + parentStats, inCols, outCols, lIndex); + sop.setStatistics(result); + } + return null; + } + + } + + public static class PTFStatsRule extends DefaultStatsRule implements NodeProcessor { + // PTF operator doesn't change the input rows. However it adds new column + // for each aggregation. Following rules are exactly similar to SELECT operator. + // PTF operator will have a SEL operator as its child. Following code is same + // as in SELECT operator. SELECT operator will act as a pass through if its + // parent is a PTF operator + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + PTFOperator pop = (PTFOperator) nd; + SelectOperator child = (SelectOperator) pop.getChildOperators().get(0); + Operator parent = pop.getParentOperators().get(0); + Statistics parentStats = parent.getStatistics(); + + // do not proceed if parent statistics is not available + if (parentStats != null) { + TableScanOperator tsop = StatsUtils.getRoots(pop).get(0); + Table table = aspCtx.getParseContext().getTopToTable().get(tsop); + String dbName = table.getDbName(); + String tableName = table.getTableName(); + Map partList = null; + + if (table.isPartitioned()) { + partList = aspCtx.getParseContext().getPrunedPartitions(); + } + + // get lineage information + Index lIndex = aspCtx.getParseContext().getLineageIndex(); + List inCols = child.getConf().getColList(); + List outCols = child.getConf().getOutputColumnNames(); + + Statistics result = StatsUtils.applySelectRule(dbName, tableName, pop, aspCtx, partList, + parentStats, inCols, outCols, lIndex); + pop.setStatistics(result); + } + return null; + } + + } + + public static class FilterStatsRule extends DefaultStatsRule implements NodeProcessor { + + // Filter operator doesn't change the average row size but it does change + // the number of rows emitted. The reduction in the number of rows emitted + // is dependent on the filter expression. + + // Notations: + // T(S) - Number of tuples in relations S + // V(S,A) - Number of distinct values of attribute A in relation S + + // Rules: + // 1 - Column equals a constant + // T(S) = T(R) / V(R,A) + + // 2 - Inequality conditions + // T(S) = T(R) / 3 + + // 3 - Not equals comparison + // T(S) = T(R) + // (or) + // T(S) = T(R) (V(R,A) - 1) / V(R,A) + + // 4 - NOT condition + // T(S) = 1 - T(S'), where T(S') is the satisfying condition + + // 5 - Multiple AND conditions + // Cascadingly apply the rules 1 to 3 (order doesn't matter) + + // 6 - Multiple OR conditions + // Simple case is to evaluate conditions independently and sum the results + // T(S) = m1 + m2 + // (or) + // T(S) = T(R) * ( 1 - ( 1 - m1/T(R) ) * ( 1 - m2/T(R) )) + // where, m1 is the number of tuples that satisfy condition1 and + // m2 is the number of tuples that satisfy condition2 + + // For more information, refer 'Estimating The Cost Of Operations' chapter + // in "Database Systems: The Complete Book" by Garcia-Molina et. al. + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + FilterOperator fop = (FilterOperator) nd; + Operator parent = fop.getParentOperators().get(0); + Statistics parentStats = parent.getStatistics(); + + if (parentStats != null) { + TableScanOperator tsop = StatsUtils.getRoots(fop).get(0); + Table table = aspCtx.getParseContext().getTopToTable().get(tsop); + String dbName = table.getDbName(); + String tableName = table.getTableName(); + + try { + // proceed only if parent statistics is available and complete + if (parentStats.getBasicStatsState().equals(Statistics.State.COMPLETE) && + parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE)) { + ExprNodeDesc root = fop.getConf().getPredicate(); + long newRowCount = evaluateExpression(parent.getStatistics(), dbName, tableName, root, + aspCtx); + Statistics st = parentStats.clone(); + StatsUtils.updateStats(st, newRowCount); + fop.setStatistics(st); + } else { + // if basic stats are not complete then pass on the stats from its parent without apply + // any rule + fop.setStatistics(parentStats.clone()); + } + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + aspCtx.setAndExprStats(null); + } + return null; + } + + private long evaluateExpression(Statistics stats, String dbName, + String tabName, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx) throws SemanticException { + long newNumRows = 0; + Statistics andStats = null; + if (pred instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred; + GenericUDF udf = genFunc.getGenericUDF(); + + // for AND condition cascadingly update stats + if (udf instanceof GenericUDFOPAnd) { + try { + andStats = stats.clone(); + aspCtx.setAndExprStats(andStats); + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + + // evaluate children + for (ExprNodeDesc child : genFunc.getChildren()) { + newNumRows = evaluateChildExpr(aspCtx.getAndExprStats(), dbName, tabName, child, aspCtx); + StatsUtils.updateStats(aspCtx.getAndExprStats(), newNumRows); + } + } else { + + // for OR condition independently compute and update stats + if (udf instanceof GenericUDFOPOr) { + for (ExprNodeDesc child : genFunc.getChildren()) { + newNumRows += evaluateChildExpr(stats, dbName, tabName, child, aspCtx); + } + } else if (udf instanceof GenericUDFOPNot) { + newNumRows = evaluateNotExpr(stats, dbName, tabName, pred, aspCtx); + } else if (udf instanceof GenericUDFOPNotNull) { + newNumRows = evaluateColEqualsNullExpr(stats, dbName, tabName, pred, aspCtx); + newNumRows = stats.getNumRows() - newNumRows; + } else if (udf instanceof GenericUDFOPNull) { + newNumRows = evaluateColEqualsNullExpr(stats, dbName, tabName, pred, aspCtx); + } else { + + // single predicate condition + newNumRows = evaluateChildExpr(stats, dbName, tabName, pred, aspCtx); + } + } + } + + return newNumRows; + } + + private long evaluateNotExpr(Statistics stats, String dbName, + String tabName, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx) throws SemanticException { + + long numRows = stats.getNumRows(); + + // if the evaluate yields true then pass all rows else pass 0 rows + if (pred instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred; + for (ExprNodeDesc leaf : genFunc.getChildren()) { + if (leaf instanceof ExprNodeGenericFuncDesc) { + + // GenericUDF + long newNumRows = 0; + for (ExprNodeDesc child : ((ExprNodeGenericFuncDesc) pred).getChildren()) { + newNumRows = evaluateChildExpr(stats, dbName, tabName, child, aspCtx); + } + return numRows - newNumRows; + } else if (leaf instanceof ExprNodeConstantDesc) { + ExprNodeConstantDesc encd = (ExprNodeConstantDesc) leaf; + if (encd.getValue().equals(true)) { + return 0; + } else { + return numRows; + } + } else { + // NOT on columns not possible + } + } + } + return 0; + } + + private long evaluateColEqualsNullExpr(Statistics stats, String dbName, String tabName, + ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx) { + + long numRows = stats.getNumRows(); + + // evaluate similar to "col = constant" expr + if (pred instanceof ExprNodeGenericFuncDesc) { + + ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred; + for (ExprNodeDesc leaf : genFunc.getChildren()) { + + if (leaf instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf; + String colName = colDesc.getColumn(); + long dvs = StatsUtils.getDistinctCountOfColumn(stats, dbName, tabName, colName); + if (dvs != 0) { + return numRows / dvs; + } else { + return numRows; + } + } + } + } + + // worst case + return numRows; + } + + private long evaluateChildExpr(Statistics stats, String dbName, + String tabName, ExprNodeDesc child, AnnotateStatsProcCtx aspCtx) throws SemanticException { + + long numRows = stats.getNumRows(); + + if (child instanceof ExprNodeGenericFuncDesc) { + + ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) child; + GenericUDF udf = genFunc.getGenericUDF(); + + if (udf instanceof GenericUDFOPEqual || udf instanceof GenericUDFOPEqualNS) { + String colName = null; + boolean isConst = false; + + for (ExprNodeDesc leaf : genFunc.getChildren()) { + if (leaf instanceof ExprNodeConstantDesc) { + + // if the first argument is const then just set the flag and continue + if (colName == null) { + isConst = true; + continue; + } + long dvs = StatsUtils.getDistinctCountOfColumn(stats, dbName, tabName, colName); + if (dvs != 0) { + return numRows / dvs; + } else { + return numRows; + } + + } else if (leaf instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf; + colName = colDesc.getColumn(); + + // if const is first argument then evaluate the result + if (isConst) { + long dvs = StatsUtils.getDistinctCountOfColumn(stats, dbName, tabName, colName); + if (dvs != 0) { + return numRows / dvs; + } else { + return numRows; + } + } + } + } + } else if (udf instanceof GenericUDFOPNotEqual) { + return numRows; + } else if (udf instanceof GenericUDFOPEqualOrGreaterThan || + udf instanceof GenericUDFOPEqualOrLessThan || + udf instanceof GenericUDFOPGreaterThan || + udf instanceof GenericUDFOPLessThan) { + return numRows / 3; + } else { + return evaluateExpression(stats, dbName, tabName, genFunc, aspCtx); + } + } + + // worst case + return numRows; + } + + } + + public static class GroupByStatsRule extends DefaultStatsRule implements NodeProcessor { + + // Group By operator changes the number of rows. The number of rows emitted + // by GBY operator will be atleast 1 or utmost T(R) i.e, number of rows in + // relation R. A better estimate can be found if we have column statistics + // on the columns that we are grouping on. + // Suppose if we are grouping by attributes A,B,C and if statistics for + // columns A,B,C are available then a better estimate can be found by taking + // the smaller of product of V(R,[A,B,C]) (product number of distinct of A,B,C) + // and T(R)/2. + // For more information, refer 'Estimating The Cost Of Operations' chapter + // in "Database Systems: The Complete Book" by Garcia-Molina et. al. + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + GroupByOperator gop = (GroupByOperator) nd; + Operator parent = gop.getParentOperators().get(0); + Statistics parentStats = parent.getStatistics(); + if (parentStats != null) { + TableScanOperator tsop = StatsUtils.getRoots(gop).get(0); + Table table = aspCtx.getParseContext().getTopToTable().get(tsop); + String dbName = table.getDbName(); + String tableName = table.getTableName(); + try { + if (parentStats.getBasicStatsState().equals(Statistics.State.COMPLETE)) { + List attrs = getGroupByAttributes(gop, parentStats, aspCtx, gop.getConf() + .getKeys()); + long dvProd = 1; + + // get the grouping attribute and compute the product of distinct cardinalities + for (String attr : attrs) { + dvProd *= StatsUtils.getDistinctCountOfColumn(parentStats, dbName, tableName, attr); + } + + // reducer side GBY with GROUPING_SETS will reduce the number of rows + // because of aggregation. For ex: GBY(A,B) WITH CUBE, mapper will emit + // 4 rows for each input row. The reducer side GBY will aggregate the + // rows and thereby decreasing the number of rows. The number of rows + // can be estimated as follows + // T(R) = min(T(R)/2, T(R, GBY(A,B)) + T(R, GBY(A)) + T(R, GBY(B)) + 1)) + if (gop.getParentOperators().get(0) instanceof ReduceSinkOperator) { + + // check if the map-side GBY has grouping set defined + if (isParentGBYContainsGroupingSet(gop)) { + dvProd = 1; + + // grouping sets are set of integers. The position of set bit + // in integer corresponds to grouping level. + // Ex: For Grouping attributes: A, B, C + // If grouping set integer is 3, the groping levels will be + // (A, *, *) corresponding to set bit positions 011 + Set gs = getGroupingSet(gop); + + if (gs != null) { + List dvs = Lists.newArrayList(); + attrs = getGroupByAttributes(gop, parentStats, aspCtx, gop.getConf() + .getKeys()); + + // creates list of distinct counts corresponding to grouping attrs + for (String dim : attrs) { + dvs.add(StatsUtils + .getDistinctCountOfColumn(parentStats, dbName, tableName, dim)); + } + + for (Integer gsIdx : gs) { + // based on the grouping set integer, compute the product + // of distinct counts which corresponds the expected number + // of rows + dvProd += getRowCountsForGroupingSet(gsIdx, dvs); + + // if the estimated num rows already exceeded half the number of rows + // then we can exit early to avoid expensive computation + if (dvProd > (parentStats.getNumRows() / 2)) { + break; + } + } + } + + Statistics stats = parentStats.clone(); + long newNumRows = applyGBYRule(stats.getNumRows(), dvProd); + StatsUtils.updateStats(stats, newNumRows); + gop.setStatistics(stats); + } else { + + // parent doesn't contain grouping sets. since it is reducer side + // we don't have to apply the GBY rule again since we have done + // it in the map side + gop.setStatistics(parentStats.clone()); + return null; + } + } + + // apply GBY rule in map side + Statistics stats = parentStats.clone(); + long newNumRows = applyGBYRule(stats.getNumRows(), dvProd); + + // if grouping set is present then it means a CUBE/ROLLUP/GROUPING_SET + // operation. In any of these cases, each row will be duplicated by + // for size of the grouping sets. For example: GBY(A,B,C) WITH CUBE will + // have GROUPING_SET(0, 1, 2, 3, 4, 5, 6, 7) which implies that each row + // will be duplicated 8 times for different combinations of aggregations. + // The number of rows/data size will be increased by a factor of the + // number of elements in grouping set + if (gop.getConf().isGroupingSetsPresent()) { + int multiplier = gop.getConf().getListGroupingSets().size(); + newNumRows = multiplier * newNumRows; + } + + StatsUtils.updateStats(stats, newNumRows); + gop.setStatistics(stats); + } else { + + // if basic stats are not complete then pass on the stats from its parent without apply + // any rule + gop.setStatistics(parentStats.clone()); + } + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + } + return null; + } + + private Set getGroupingSet(Operator currOp) { + Set result = Sets.newHashSet(); + getGroupingSetImpl(currOp, result); + return result; + } + + private void getGroupingSetImpl(Operator currOp, Set gs) { + if (currOp == null) { + return; + } + for (Operator op : currOp.getParentOperators()) { + if (op instanceof GroupByOperator) { + GroupByOperator gby = (GroupByOperator) op; + if (gby.getConf().isGroupingSetsPresent()) { + gs.addAll(gby.getConf().getListGroupingSets()); + } + } else { + getGroupingSetImpl(op, gs); + } + } + } + + private boolean isParentGBYContainsGroupingSet(Operator currOp) { + if (currOp == null) { + return false; + } + for (Operator op : currOp.getParentOperators()) { + if (op instanceof GroupByOperator) { + GroupByOperator gby = (GroupByOperator) op; + if (gby.getConf().isGroupingSetsPresent()) { + return true; + } + } else { + return isParentGBYContainsGroupingSet(op); + } + } + return false; + } + + private long applyGBYRule(long numRows, long dvProd) { + long newNumRows = numRows; + // to avoid divide by 2 to become 0 + if (numRows > 1) { + if (dvProd != 0) { + newNumRows = Math.min(numRows / 2, dvProd); + } else { + newNumRows = numRows / 2; + } + } + return newNumRows; + } + + private long getRowCountsForGroupingSet(Integer gsIdx, List dvs) { + int val = gsIdx.intValue(); + int idx = 0; + int size = dvs.size(); + List cloneDVS = Lists.newArrayList(); + cloneDVS.addAll(dvs); + + // set 1 for distinct values in positions of aggregation + // if the aggregation is *,_col1_,* then distinct value vector + // will be 1,DV(_col1),1 + while (val != 0) { + if ((val & 1) == 1) { + // set in the reverse order + cloneDVS.set(size - idx - 1, 1L); + } + val = val >>> 1; + idx++; + } + + long result = 1; + for (Long l : cloneDVS) { + result *= l; + } + return result; + } + + private List getGroupByAttributes(GroupByOperator gop, Statistics stats, + AnnotateStatsProcCtx aspCtx, + ArrayList keys) throws SemanticException { + List attrs = Lists.newArrayList(); + Index lIndex = aspCtx.getParseContext().getLineageIndex(); + for (ExprNodeDesc end : keys) { + if (end instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; + String colName = encd.getColumn(); + String tabAlias = null; + if ((colName.startsWith("_col") || colName.startsWith("KEY._col"))) { + if (colName.startsWith("KEY._col")) { + // strip off KEY. from column name + colName = colName.split("\\.")[1]; + } + + RowResolver rr = aspCtx.getParseContext().getOpParseCtx().get(gop).getRowResolver(); + + // internal name. we need to use inverse row resolver to get the actual name + if (colName.startsWith("_")) { + + // the value field of inverse resolve map is String[] with 2 values. 1st value is + // table alias and 2nd value is column alias + tabAlias = rr.getInvRslvMap().get(colName)[0]; + colName = rr.getInvRslvMap().get(colName)[1]; + } else { + colName = encd.getColumn(); + } + + ColumnInfo ci = rr.get(tabAlias, colName); + Dependency dep = lIndex.getDependency(gop, ci); + if (dep != null) { + + // we can ignore columns that do not have reference to base columns. + // GBY adds an additional KEY column which can be ignored + if (dep.getBaseCols() != null && dep.getBaseCols().size() > 0) { + colName = dep.getBaseCols().get(0).getColumn().getName(); + attrs.add(colName); + } + } else { + attrs.add(colName); + } + } else { + attrs.add(colName); + } + } + } + return attrs; + } + } + + public static class JoinStatsRule extends DefaultStatsRule implements NodeProcessor { + + // There are three cases + // 1: The values of join keys are disjoint in both relations in which case + // T(RXS) = 0 (we need histograms for this) + // 2: Join key is primary key on relation R and foreign key on relation S + // in which case every tuple in S will have a tuple in R + // T(RXS) = T(S) (we need histograms for this) + // 3: Both R & S relation have same value for join-key. Ex: bool column with + // all true values + // T(RXS) = T(R)*T(S) (we need histograms for this. counDistinct = 1 and same value) + + // Since we don't know how value of join column relates we will use the following + // general case + // T(RXS) = (T(R)*T(S))/max(V(R,Y), V(S,Y)) where Y is the join attribute + + // in case of joining on multiple attribute + // T(RXS) = T(R)*T(S)/max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2)) + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + CommonJoinOperator jop = (CommonJoinOperator) nd; + Operator p = jop.getParentOperators().get(0); + Statistics parentStats = p.getStatistics(); + AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; + + try { + if (parentStats != null && + parentStats.getBasicStatsState().equals(Statistics.State.COMPLETE) && + parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE)) { + long prodRows = 1; + List distinctVals = Lists.newArrayList(); + boolean multiAttr = false; + + // get the join keys from parent ReduceSink operators + List> parents = jop.getParentOperators(); + for (int pos = 0; pos < parents.size(); pos++) { + ReduceSinkOperator parent = null; + + if (parents.get(pos) instanceof ReduceSinkOperator) { + parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); + } else { + + // must be a DEMUX operator. RS will be grand parent + parent = (ReduceSinkOperator) jop.getParentOperators().get(0).getParentOperators() + .get(pos); + } + ReduceSinkDesc rsconf = parent.getConf(); + List keys = rsconf.getKeyCols(); + List dvs = Lists.newArrayList(); + + if (keys.size() > 1) { + multiAttr = true; + } + + // get the table scan operator i.e root of each parent to get + // column statistics corresponding to each tables + List roots = StatsUtils.getRoots(parent); + + // for each of the join keys get the distinct counts and use the + // max of it for applying the rule. + for (ExprNodeDesc end : keys) { + String joinCol = ""; + if (end instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; + joinCol = encd.getColumn(); + } else if (end instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end; + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) engfd.getChildren().get(0); + joinCol = encd.getColumn(); + } + + for (TableScanOperator root : roots) { + Table rootTable = aspCtx.getParseContext().getTopToTable().get(root); + String rootDBName = rootTable.getDbName(); + String rootTabName = rootTable.getTableName(); + + long dv = StatsUtils.getDistinctCountOfColumn(parentStats, rootDBName, + rootTabName, joinCol); + dvs.add(dv); + prodRows = prodRows * parentStats.getNumRows(); + } + + long maxDV = Collections.max(dvs); + + // preserve the max DV for each join key. this is useful for + // multi-attribute join case + distinctVals.add(maxDV); + } + } + + // compute the denominator for single key join and multi-key join + // using the rule from description above + long denom = 1; + if (multiAttr) { + + // multiply the max distinct values of multiple keys + for (Long dv : distinctVals) { + denom = denom * dv; + } + } else { + denom = Collections.max(distinctVals); + } + + long newRowCount = prodRows / denom; + Statistics stats = parentStats.clone(); + StatsUtils.updateStats(stats, newRowCount); + jop.setStatistics(stats); + } else { + // if basic stats are not complete then pass on the stats from its parent without apply + // any rule + if (parentStats != null) { + jop.setStatistics(parentStats.clone()); + } + } + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + + return null; + } + + } + + public static class LimitStatsRule extends DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + LimitOperator lop = (LimitOperator) nd; + Operator parent = lop.getParentOperators().get(0); + Statistics parentStats = parent.getStatistics(); + if (parentStats != null) { + long limit = -1; + try { + if (parentStats.getBasicStatsState().equals(Statistics.State.COMPLETE)) { + limit = lop.getConf().getLimit(); + if (limit == -1) { + limit = lop.getConf().getLeastRows(); + } + + if (limit <= parentStats.getNumRows()) { + Statistics stats = parentStats.clone(); + StatsUtils.updateStats(stats, limit); + lop.setStatistics(stats); + } + } else { + // if basic stats are not complete then pass on the stats from its parent without apply + // any rule + lop.setStatistics(parentStats.clone()); + } + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + } + return null; + } + + } + + public static class DemuxStatsRule extends DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + // DEMUX operator duplicates the input row by the number of children. so multiply + // the basic statistics by number of children + DemuxOperator dop = (DemuxOperator) nd; + Operator parent = dop.getParentOperators().get(0); + Statistics parentStats = parent.getStatistics(); + if (parentStats != null) { + int numChild = dop.getNumChild(); + try { + if (parentStats.getBasicStatsState().equals(Statistics.State.COMPLETE)) { + Statistics stats = parentStats.clone(); + stats.setNumRows(parentStats.getNumRows() * numChild); + stats.setDataSize(parentStats.getDataSize() * numChild); + dop.setStatistics(stats); + } else { + // if basic stats are not complete then pass on the stats from its parent without apply + // any rule + dop.setStatistics(parentStats.clone()); + } + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + } + return null; + } + + } + + public static class DefaultStatsRule implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + Operator op = (Operator) nd; + Statistics stats = op.getConf().getStatistics(); + if (stats == null) { + if (op.getParentOperators() != null) { + + // if parent statistics is null then that branch of the tree is not walked yet. + // don't update the stats until all branches are walked + if (isAllParentsContainStatistics(op)) { + stats = new Statistics(); + for (Operator parent : op.getParentOperators()) { + if (parent.getStatistics() != null) { + Statistics parentStats = parent.getStatistics(); + stats.addToNumRows(parentStats.getNumRows()); + stats.addToDataSize(parentStats.getDataSize()); + stats.updateBasicStatsState(parentStats.getBasicStatsState()); + stats.updateColumnStatsState(parentStats.getColumnStatsState()); + stats.addToColumnStats(parentStats.getColumnStats()); + op.getConf().setStatistics(stats); + } + } + } + } + } + + return null; + } + + // check if all parent statistics are available + private boolean isAllParentsContainStatistics(Operator op) { + for (Operator parent : op.getParentOperators()) { + if (parent.getStatistics() == null) { + return false; + } + } + return true; + } + + } + + public static NodeProcessor getTableScanRule() { + return new TableScanStatsRule(); + } + + public static NodeProcessor getSelectRule() { + return new SelectStatsRule(); + } + + public static NodeProcessor getFilterRule() { + return new FilterStatsRule(); + } + + public static NodeProcessor getGroupByRule() { + return new GroupByStatsRule(); + } + + public static NodeProcessor getJoinRule() { + return new JoinStatsRule(); + } + + public static NodeProcessor getDemuxRule() { + return new DemuxStatsRule(); + } + + public static NodeProcessor getLimitRule() { + return new LimitStatsRule(); + } + + public static NodeProcessor getPTFRule() { + return new PTFStatsRule(); + } + + public static NodeProcessor getDefaultRule() { + return new DefaultStatsRule(); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 46d1fac..a109eb2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -60,6 +60,7 @@ import org.apache.hadoop.hive.ql.metadata.InvalidTableException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.lineage.LineageCtx.Index; import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; @@ -106,6 +107,7 @@ * Lineage information for the query. */ protected LineageInfo linfo; + protected Index lineageIndex; protected TableAccessInfo tableAccessInfo; protected ColumnAccessInfo columnAccessInfo; @@ -862,6 +864,24 @@ public void setLineageInfo(LineageInfo linfo) { } /** + * Gets the lineage index. + * + * @return LineageIndex associated with the query. + */ + public Index getLineageIndex() { + return lineageIndex; + } + + /** + * Sets the lineage index. + * + * @param index The LineageIndex structure that is set in the optimization phase + */ + public void setLineageIndex(Index lindex) { + this.lineageIndex = lindex; + } + + /** * Gets the table access information. * * @return TableAccessInfo associated with the query. diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index b815ea2..0e3ca3f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.lineage.LineageCtx.Index; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -101,6 +102,7 @@ * The lineage information. */ private LineageInfo lInfo; + private Index lineageIndex; private GlobalLimitCtx globalLimitCtx; @@ -677,4 +679,12 @@ public ListSinkOperator getFetchSink() { public void setFetchSink(ListSinkOperator fetchSink) { this.fetchSink = fetchSink; } + + public Index getLineageIndex() { + return lineageIndex; + } + + public void setLineageIndex(Index lineageIndex) { + this.lineageIndex = lineageIndex; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 5305537..5f9b66e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -310,6 +310,7 @@ public void initParseCtx(ParseContext pctx) { prunedPartitions = pctx.getPrunedPartitions(); fetchTask = pctx.getFetchTask(); setLineageInfo(pctx.getLineageInfo()); + setLineageIndex(pctx.getLineageIndex()); } public ParseContext getParseContext() { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java index c096a65..24694ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java @@ -21,6 +21,18 @@ public class AbstractOperatorDesc implements OperatorDesc { private boolean vectorMode = false; + protected transient Statistics statistics; + + @Override + @Explain(displayName = "Statistics", normalExplain = false) + public Statistics getStatistics() { + return statistics; + } + + @Override + public void setStatistics(Statistics statistics) { + this.statistics = statistics; + } @Override public Object clone() throws CloneNotSupportedException { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java new file mode 100644 index 0000000..3096afa --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + + +public class ColStatistics { + + private String fqColName; + private String colName; + private String colType; + private long countDistint; + private long numNulls; + private double avgColLen; + + public ColStatistics(String colName, String colType) { + this.setColumnName(colName); + this.setColumnType(colType); + this.setFullyQualifiedColName(colName); + } + + public ColStatistics() { + this(null, null); + } + + public String getColumnName() { + return colName; + } + + public void setColumnName(String colName) { + this.colName = colName; + } + + public String getColumnType() { + return colType; + } + + public void setColumnType(String colType) { + this.colType = colType; + } + + public long getCountDistint() { + return countDistint; + } + + public void setCountDistint(long countDistint) { + this.countDistint = countDistint; + } + + public long getNumNulls() { + return numNulls; + } + + public void setNumNulls(long numNulls) { + this.numNulls = numNulls; + } + + public double getAvgColLen() { + return avgColLen; + } + + public void setAvgColLen(double avgColLen) { + this.avgColLen = avgColLen; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(" fqColName: "); + sb.append(fqColName); + sb.append(" colName: "); + sb.append(colName); + sb.append(" colType: "); + sb.append(colType); + sb.append(" countDistincts: "); + sb.append(countDistint); + sb.append(" numNulls: "); + sb.append(numNulls); + sb.append(" avgColLen: "); + sb.append(avgColLen); + return sb.toString(); + } + + @Override + protected ColStatistics clone() throws CloneNotSupportedException { + ColStatistics clone = new ColStatistics(colName, colType); + clone.setFullyQualifiedColName(fqColName); + clone.setAvgColLen(avgColLen); + clone.setCountDistint(countDistint); + clone.setNumNulls(numNulls); + return clone; + } + + public String getFullyQualifiedColName() { + return fqColName; + } + + public void setFullyQualifiedColName(String fqColName) { + this.fqColName = fqColName; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java index 36757e8..6c2efaf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java @@ -22,4 +22,6 @@ public interface OperatorDesc extends Serializable, Cloneable { public Object clone() throws CloneNotSupportedException; + public Statistics getStatistics(); + public void setStatistics(Statistics statistics); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java new file mode 100644 index 0000000..d051cdb --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -0,0 +1,234 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +/** + * Statistics. Describes the output of an operator in terms of size, rows, etc + * based on estimates. + */ +@SuppressWarnings("serial") +public class Statistics implements Serializable { + + public enum State { + COMPLETE, PARTIAL, NONE + } + + private long numRows; + private long dataSize; + private State basicStatsState; + private Map columnStats; + private State columnStatsState; + + public Statistics() { + this(0, 0); + } + + public Statistics(long nr, long ds) { + this.numRows = nr; + this.dataSize = ds; + this.basicStatsState = State.NONE; + this.columnStats = null; + this.columnStatsState = State.NONE; + } + + public long getNumRows() { + return numRows; + } + + public void setNumRows(long numRows) { + this.numRows = numRows; + } + + public long getDataSize() { + return dataSize; + } + + public void setDataSize(long dataSize) { + this.dataSize = dataSize; + } + + public State getBasicStatsState() { + return basicStatsState; + } + + public void setBasicStatsState(State basicStatsState) { + this.basicStatsState = basicStatsState; + } + + public State getColumnStatsState() { + return columnStatsState; + } + + public void setColumnStatsState(State columnStatsState) { + this.columnStatsState = columnStatsState; + } + + @Override + @Explain(displayName = "") + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(" numRows: "); + sb.append(numRows); + sb.append(" dataSize: "); + sb.append(dataSize); + sb.append(" basicStatsState: "); + sb.append(basicStatsState); + sb.append(" colStatsState: "); + sb.append(columnStatsState); + return sb.toString(); + } + + @Override + public Statistics clone() throws CloneNotSupportedException { + Statistics clone = new Statistics(numRows, dataSize); + clone.setBasicStatsState(basicStatsState); + clone.setColumnStatsState(columnStatsState); + if (columnStats != null) { + Map cloneColStats = Maps.newHashMap(); + for (Map.Entry entry : columnStats.entrySet()) { + cloneColStats.put(entry.getKey(), entry.getValue().clone()); + } + clone.setColumnStats(cloneColStats); + } + return clone; + } + + public void addToNumRows(long nr) { + numRows += nr; + } + + public void addToDataSize(long rds) { + dataSize += rds; + } + + public void setColumnStats(Map colStats) { + this.columnStats = colStats; + } + + public void setColumnStats(List colStats) { + columnStats = Maps.newHashMap(); + addToColumnStats(colStats); + } + + public void addToColumnStats(List colStats) { + + if (columnStats == null) { + columnStats = Maps.newHashMap(); + } + + if (colStats != null) { + for (ColStatistics cs : colStats) { + ColStatistics updatedCS = null; + if (cs != null) { + + String key = cs.getFullyQualifiedColName(); + // if column statistics for a column is already found then merge the statistics + if (columnStats.containsKey(key) && columnStats.get(key) != null) { + updatedCS = columnStats.get(key); + updatedCS.setAvgColLen(Math.max(updatedCS.getAvgColLen(), cs.getAvgColLen())); + updatedCS.setNumNulls(updatedCS.getNumNulls() + cs.getNumNulls()); + updatedCS.setCountDistint(Math.max(updatedCS.getCountDistint(), cs.getCountDistint())); + columnStats.put(key, updatedCS); + } else { + columnStats.put(key, cs); + } + } + } + } + } + + // newState + // ----------------------------------------- + // basicStatsState | COMPLETE PARTIAL NONE | + // |_______________________________________| + // COMPLETE | COMPLETE PARTIAL PARTIAL | + // PARTIAL | PARTIAL PARTIAL PARTIAL | + // NONE | COMPLETE PARTIAL NONE | + // ----------------------------------------- + public void updateBasicStatsState(State newState) { + if (newState.equals(State.PARTIAL)) { + basicStatsState = State.PARTIAL; + } + + if (newState.equals(State.NONE)) { + if (basicStatsState.equals(State.NONE)) { + basicStatsState = State.NONE; + } else { + basicStatsState = State.PARTIAL; + } + } + + if (newState.equals(State.COMPLETE)) { + if (basicStatsState.equals(State.PARTIAL)) { + basicStatsState = State.PARTIAL; + } else { + basicStatsState = State.COMPLETE; + } + } + } + + // similar to the table above for basic stats + public void updateColumnStatsState(State newState) { + if (newState.equals(State.PARTIAL)) { + columnStatsState = State.PARTIAL; + } + + if (newState.equals(State.NONE)) { + if (columnStatsState.equals(State.NONE)) { + columnStatsState = State.NONE; + } else { + columnStatsState = State.PARTIAL; + } + } + + if (newState.equals(State.COMPLETE)) { + if (columnStatsState.equals(State.PARTIAL)) { + columnStatsState = State.PARTIAL; + } else { + columnStatsState = State.COMPLETE; + } + } + } + + public long getAvgRowSize() { + if (basicStatsState.equals(State.COMPLETE) && numRows != 0) { + return dataSize / numRows; + } + + return 0; + } + + public ColStatistics getColumnStatisticsForColumn(String colName) { + return columnStats.get(colName); + } + + public List getColumnStats() { + if(columnStats != null) { + return Lists.newArrayList(columnStats.values()); + } + return null; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java new file mode 100644 index 0000000..1a0b7a6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -0,0 +1,1005 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.stats; + +import java.util.Collection; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo; +import org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency; +import org.apache.hadoop.hive.ql.hooks.LineageInfo.DependencyType; +import org.apache.hadoop.hive.ql.hooks.LineageInfo.TableAliasInfo; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.lineage.LineageCtx.Index; +import org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateStatsProcCtx; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.ql.plan.Statistics.State; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantBinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; +import org.apache.hadoop.io.BytesWritable; + +import com.google.common.collect.Lists; + +public class StatsUtils { + + /** + * Collect table, partition and column level statistics + * @param partList + * @param table + * @param tableScanOperator + * @param expr + * @return Statistics + */ + public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, + Table table, TableScanOperator tableScanOperator) { + + Statistics stats = new Statistics(); + // column level statistics are required only for the columns that are needed + List neededColumns = tableScanOperator.getNeededColumns(); + + if (!table.isPartitioned()) { + + long nr = getTableStats(conf, table, StatsSetupConst.ROW_COUNT); + long rds = getTableStats(conf, table, StatsSetupConst.RAW_DATA_SIZE); + // if basic stats are not available then return + if (nr <= 0 && rds <= 0) { + stats.setBasicStatsState(Statistics.State.NONE); + return stats; + } + + // if any basic stats is missing, mark it as partial stats + if (nr <= 0 || rds <= 0) { + stats.setBasicStatsState(Statistics.State.PARTIAL); + } + + // if both are available then we have complete basic stats + if (nr > 0 && rds > 0) { + stats.setBasicStatsState(Statistics.State.COMPLETE); + } + stats.setNumRows(nr); + stats.setDataSize(rds); + + // get table level column statistics + List colStats = getTableColumnStats(table, neededColumns); + + // if column stats available and if atleast one column doesn't have stats + // then mark it as partial + if (checkIfColStatsAvailable(colStats) && colStats.contains(null)) { + stats.setColumnStatsState(Statistics.State.PARTIAL); + } + + // if column stats available and if all columns have stats then mark it + // as complete + if (checkIfColStatsAvailable(colStats) && !colStats.contains(null)) { + stats.setColumnStatsState(Statistics.State.COMPLETE); + } + + if (!checkIfColStatsAvailable(colStats)) { + if (!neededColumns.isEmpty()) { + stats.setColumnStatsState(Statistics.State.NONE); + } else { + stats.setColumnStatsState(Statistics.State.COMPLETE); + } + stats.addToColumnStats(null); + } else { + // set col stats and mark it as table level col stats + stats.addToColumnStats(colStats); + } + } else { + + // For partitioned tables, get the size of all the partitions after pruning + // the partitions that are not required + if (partList != null) { + for (Partition part : partList.getNotDeniedPartns()) { + long nr = getPartitionStats(conf, part, StatsSetupConst.ROW_COUNT); + long rds = getPartitionStats(conf, part, StatsSetupConst.RAW_DATA_SIZE); + // if both basic stats are not available then mark it as stats not available + if (nr <= 0 && rds <= 0) { + stats.updateBasicStatsState(Statistics.State.NONE); + } else if (nr <= 0 || rds <= 0) { + stats.updateBasicStatsState(Statistics.State.PARTIAL); + } else { + stats.updateBasicStatsState(Statistics.State.COMPLETE); + } + stats.addToNumRows(nr); + stats.addToDataSize(rds); + + // get partition level column statistics + List colStats = getPartitionColumnStats(table, part, neededColumns); + if (checkIfColStatsAvailable(colStats) && colStats.contains(null)) { + stats.updateColumnStatsState(Statistics.State.PARTIAL); + } else if (checkIfColStatsAvailable(colStats) && !colStats.contains(null)) { + stats.updateColumnStatsState(Statistics.State.COMPLETE); + } else { + stats.updateColumnStatsState(Statistics.State.NONE); + } + + stats.addToColumnStats(colStats); + } + } + } + + return stats; + } + + /** + * Get the partition level columns statistics from metastore for all the needed columns + * @param table + * @param part + * @param neededColumns + * @return column statistics + */ + private static List getPartitionColumnStats(Table table, Partition part, + List neededColumns) { + + List colStatistics = Lists.newArrayList(); + for (String col : neededColumns) { + colStatistics.add(getParitionColumnStatsForColumn(table.getDbName(), table.getTableName(), + part.getName(), col)); + } + return colStatistics; + } + + public static ColStatistics getParitionColumnStatsForColumn(String dbName, String tabName, + String partName, String colName) { + try { + ColumnStatistics colStats = + Hive.get().getPartitionColumnStatistics(dbName, tabName, partName, colName); + if (colStats != null) { + for (ColumnStatisticsObj cso : colStats.getStatsObj()) { + return getColStat(cso, dbName, tabName, partName, colName); + } + } + } catch (HiveException e) { + return null; + } + return null; + } + + /** + * Will return true if column statistics for atleast one column is available + * @param colStats + * @return + */ + private static boolean checkIfColStatsAvailable(List colStats) { + for (ColStatistics cs : colStats) { + if (cs != null) { + return true; + } + } + return false; + } + + public static ColStatistics getTableColumnStatsForColumn(String dbName, String tableName, + String colName) { + try { + ColumnStatistics colStat = Hive.get().getTableColumnStatistics(dbName, tableName, colName); + if (colStat != null) { + for (ColumnStatisticsObj cso : colStat.getStatsObj()) { + return getColStat(cso, dbName, tableName, null, colName); + } + } + } catch (HiveException e) { + return null; + } + return null; + } + + private static ColStatistics getColStat(ColumnStatisticsObj cso, String dbName, String tabName, + String partName, String colName) { + ColStatistics cs = new ColStatistics(); + cs.setColumnName(cso.getColName()); + cs.setColumnType(cso.getColType()); + cs.setFullyQualifiedColName(Utilities.getFullyQualifiedColumnName(dbName, tabName, partName, + colName)); + String colType = cso.getColType(); + ColumnStatisticsData csd = cso.getStatsData(); + if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME)) { + cs.setCountDistint(csd.getLongStats().getNumDVs()); + cs.setNumNulls(csd.getLongStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive1()); + } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { + cs.setCountDistint(csd.getLongStats().getNumDVs()); + cs.setNumNulls(csd.getLongStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive2()); + } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) { + cs.setCountDistint(csd.getDoubleStats().getNumDVs()); + cs.setNumNulls(csd.getDoubleStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive1()); + } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { + cs.setCountDistint(csd.getDoubleStats().getNumDVs()); + cs.setNumNulls(csd.getDoubleStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive2()); + } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) { + cs.setCountDistint(csd.getStringStats().getNumDVs()); + cs.setNumNulls(csd.getStringStats().getNumNulls()); + cs.setAvgColLen(csd.getStringStats().getAvgColLen()); + } else if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) { + if (csd.getBooleanStats().getNumFalses() > 0 && csd.getBooleanStats().getNumTrues() > 0) { + cs.setCountDistint(2); + } else { + cs.setCountDistint(1); + } + cs.setNumNulls(csd.getBooleanStats().getNumNulls()); + cs.setAvgColLen(JavaDataModel.get().primitive1()); + } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) { + cs.setAvgColLen(csd.getBinaryStats().getAvgColLen()); + cs.setNumNulls(csd.getBinaryStats().getNumNulls()); + } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) { + cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp()); + } else if (colType.equalsIgnoreCase(serdeConstants.DECIMAL_TYPE_NAME)) { + cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal()); + } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) { + cs.setAvgColLen(JavaDataModel.get().lengthOfDate()); + } else { + // Columns statistics for complex datatypes are not supported yet + return null; + } + return cs; + } + + /** + * Get table level column statistics from metastore for needed columns + * @param table + * @param neededColumns + * @return column statistics + */ + private static List getTableColumnStats(Table table, List neededColumns) { + + List colStatistics = Lists.newArrayList(); + for (String col : neededColumns) { + colStatistics.add(getTableColumnStatsForColumn(table.getDbName(), table.getTableName(), col)); + } + return colStatistics; + } + + /** + * Get partition level basic statistics + * @param conf + * @param part + * @param type + * @return + */ + private static long getPartitionStats(HiveConf conf, Partition part, String type) { + Path path = part.getPartitionPath(); + + if (StatsSetupConst.ROW_COUNT.equals(type)) { + return getNumRows(part.getParameters().get(type), path); + } else if (StatsSetupConst.RAW_DATA_SIZE.equals(type)) { + return getDataSize(conf, part, path); + } + + return 0; + } + + /** + * Get table level basic statistics + * @param conf + * @param table + * @param type + * @return + */ + private static long getTableStats(HiveConf conf, Table table, String type) { + Path path = table.getPath(); + + if (StatsSetupConst.ROW_COUNT.equals(type)) { + return getNumRows(table.getProperty(type), path); + } else if (StatsSetupConst.RAW_DATA_SIZE.equals(type)) { + return getDataSize(conf, table, path); + } + + return 0; + } + + /** + * Get raw data size (uncompressed size) from table/part params. If raw data size is not available + * then get total file size from table/part params. If total file size is also not available then + * get content summary of the file and read the file length (Calls FileSystem). + * @param conf + * @param object + * @param path + * @return raw data size + */ + private static long getDataSize(HiveConf conf, Object object, Path path) { + long size = 0; + Table table = null; + Partition part = null; + String rds = null; + String ts = null; + if (object instanceof Table) { + table = (Table) object; + rds = table.getProperty(StatsSetupConst.RAW_DATA_SIZE); + ts = table.getProperty(StatsSetupConst.TOTAL_SIZE); + } + + if (object instanceof Partition) { + part = (Partition) object; + rds = part.getParameters().get(StatsSetupConst.RAW_DATA_SIZE); + ts = part.getParameters().get(StatsSetupConst.TOTAL_SIZE); + } + + if (rds != null) { + try { + size = Long.valueOf(rds); + } catch (NumberFormatException e) { + size = 0; + } + } + + // check for total file size + if (size == 0 && ts != null) { + try { + size = Long.valueOf(ts); + } catch (NumberFormatException e) { + size = 0; + } + } + + // make file system call to get file length + if (size == 0) { + try { + FileSystem fs = path.getFileSystem(conf); + size = fs.getContentSummary(path).getLength(); + } catch (Exception e) { + size = 0; + } + } + return size; + } + + private static long getNumRows(String nr, Path path) { + // If the size is present in the metastore, use it + if (nr != null) { + try { + return Long.valueOf(nr); + } catch (NumberFormatException e) { + return 0; + } + } + + return 0; + } + + /** + * Get the raw data size of variable length data types + * @param oi + * @param colType + * @return raw data size + */ + public static long getSizeOfVariableLengthTypes(ObjectInspector oi, String colType) { + if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) { + + // constant string projection Ex: select "hello" from table + if (oi instanceof ConstantObjectInspector) { + ConstantObjectInspector coi = (ConstantObjectInspector) oi; + if (coi.getWritableConstantValue() == null) { + return 0; + } + return JavaDataModel.get().lengthForStringOfLength( + coi.getWritableConstantValue().toString().length()); + } else if (oi instanceof WritableConstantStringObjectInspector) { + // some UDFs return writable constant strings (fixed width) + // Ex: select upper("hello") from table + WritableConstantStringObjectInspector wcsoi = (WritableConstantStringObjectInspector) oi; + return JavaDataModel.get().lengthForStringOfLength( + wcsoi.getWritableConstantValue().toString().length()); + } else if (oi instanceof WritableStringObjectInspector) { + // some UDFs may emit strings of variable length. like pattern matching + // UDFs. it's hard to find the length of such UDFs. + return 0; + } + } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) { + // constant byte arrays + if (oi instanceof ConstantObjectInspector) { + ConstantObjectInspector coi = (ConstantObjectInspector) oi; + if (coi.getWritableConstantValue() == null) { + return 0; + } + BytesWritable bw = ((BytesWritable) coi.getWritableConstantValue()); + return JavaDataModel.get().lengthForByteArrayOfSize(bw.getLength()); + } else if (oi instanceof WritableConstantBinaryObjectInspector) { + // writable constant byte arrays + WritableConstantBinaryObjectInspector wcboi = (WritableConstantBinaryObjectInspector) oi; + return JavaDataModel.get().lengthForByteArrayOfSize( + wcboi.getWritableConstantValue().getLength()); + } else if (oi instanceof WritableBinaryObjectInspector) { + // variable byte arrays. it's hard to find the length + return 0; + } + } else { + // complex types (map, list, struct, union) + return getSizeOfComplexTypes(oi); + } + + return 0; + } + + /** + * get the size of complex data types + * @param oi + * @return raw data size + */ + private static long getSizeOfComplexTypes(ObjectInspector oi) { + long result = 0; + int length = 0; + switch (oi.getCategory()) { + case PRIMITIVE: + String colType = oi.getTypeName(); + if (colType.equalsIgnoreCase("string") || colType.equalsIgnoreCase("binary")) { + result += getSizeOfVariableLengthTypes(oi, colType); + } else { + result += getSizeOfFixedLengthPrimitivesFromType(colType); + } + break; + case LIST: + if (oi instanceof StandardConstantListObjectInspector) { + // constant list projection of known length + StandardConstantListObjectInspector scloi = (StandardConstantListObjectInspector) oi; + length = scloi.getWritableConstantValue().size(); + // check if list elements are primitive or Objects + ObjectInspector leoi = scloi.getListElementObjectInspector(); + if (leoi.getCategory().equals(ObjectInspector.Category.PRIMITIVE)) { + result += getSizeOfPrimitiveTypeArraysFromType(leoi.getTypeName(), length); + } else { + result += JavaDataModel.get().lengthForObjectArrayOfSize(length); + } + } else { + StandardListObjectInspector sloi = (StandardListObjectInspector) oi; + result += getSizeOfComplexTypes(sloi.getListElementObjectInspector()); + } + break; + case MAP: + if (oi instanceof StandardConstantMapObjectInspector) { + // constant map projection of known length + StandardConstantMapObjectInspector scmoi = (StandardConstantMapObjectInspector) oi; + result += getSizeOfMap(scmoi); + } else { + StandardMapObjectInspector smoi = (StandardMapObjectInspector) oi; + result += getSizeOfComplexTypes(smoi.getMapKeyObjectInspector()); + result += getSizeOfComplexTypes(smoi.getMapValueObjectInspector()); + } + break; + case STRUCT: + StructObjectInspector soi = (StructObjectInspector) oi; + // add constant object overhead for struct + result += JavaDataModel.get().object(); + // add constant struct field names references overhead + result += soi.getAllStructFieldRefs().size() * JavaDataModel.get().ref(); + for (StructField field : soi.getAllStructFieldRefs()) { + result += getSizeOfComplexTypes(field.getFieldObjectInspector()); + } + break; + case UNION: + UnionObjectInspector uoi = (UnionObjectInspector) oi; + // add constant object overhead for union + result += JavaDataModel.get().object(); + // add constant size for unions tags + result += uoi.getObjectInspectors().size() * JavaDataModel.get().primitive1(); + for (ObjectInspector foi : uoi.getObjectInspectors()) { + result += getSizeOfComplexTypes(foi); + } + break; + default: + break; + } + return result; + } + + /** + * get size of fixed length primitives + * @param colType + * @return raw data size + */ + public static long getSizeOfFixedLengthPrimitivesFromType(String colType) { + if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) { + return JavaDataModel.get().primitive1(); + } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { + return JavaDataModel.get().primitive2(); + } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) { + return JavaDataModel.get().lengthOfTimestamp(); + } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) { + return JavaDataModel.get().lengthOfDate(); + } else if (colType.equalsIgnoreCase(serdeConstants.DECIMAL_TYPE_NAME)) { + return JavaDataModel.get().lengthOfDecimal(); + } else { + return 0; + } + } + + /** + * get the size of arrays of primitive types + * @param colType + * @param length + * @return raw data size + */ + public static long getSizeOfPrimitiveTypeArraysFromType(String colType, int length) { + if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) { + return JavaDataModel.get().lengthForIntArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { + return JavaDataModel.get().lengthForDoubleArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { + return JavaDataModel.get().lengthForLongArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) { + return JavaDataModel.get().lengthForByteArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) { + return JavaDataModel.get().lengthForBooleanArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) { + return JavaDataModel.get().lengthForTimestampArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) { + return JavaDataModel.get().lengthForDateArrayOfSize(length); + } else if (colType.equalsIgnoreCase(serdeConstants.DECIMAL_TYPE_NAME)) { + return JavaDataModel.get().lengthForDecimalArrayOfSize(length); + } else { + return 0; + } + } + + /** + * Estimate the size of map object + * @param scmoi + * @return size of map + */ + private static long getSizeOfMap(StandardConstantMapObjectInspector scmoi) { + Map map = scmoi.getWritableConstantValue(); + ObjectInspector koi = scmoi.getMapKeyObjectInspector(); + ObjectInspector voi = scmoi.getMapValueObjectInspector(); + long result = 0; + for (Map.Entry entry : map.entrySet()) { + result += getWritableSize(koi, entry.getKey()); + result += getWritableSize(voi, entry.getValue()); + } + + // add additional overhead of each map entries + result += JavaDataModel.get().hashMap(map.entrySet().size()); + return result; + } + + /** + * get size of primitive data types based on their respective writable object inspector + * @param oi + * @param value + * @return raw data size + */ + public static long getWritableSize(ObjectInspector oi, Object value) { + if (oi instanceof WritableStringObjectInspector) { + WritableStringObjectInspector woi = (WritableStringObjectInspector) oi; + return JavaDataModel.get().lengthForStringOfLength( + woi.getPrimitiveWritableObject(value).getLength()); + } else if (oi instanceof WritableBinaryObjectInspector) { + WritableBinaryObjectInspector woi = (WritableBinaryObjectInspector) oi; + return JavaDataModel.get().lengthForByteArrayOfSize( + woi.getPrimitiveWritableObject(value).getLength()); + } else if (oi instanceof WritableBooleanObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableByteObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableDateObjectInspector) { + return JavaDataModel.get().lengthOfDate(); + } else if (oi instanceof WritableDoubleObjectInspector) { + return JavaDataModel.get().primitive2(); + } else if (oi instanceof WritableFloatObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableHiveDecimalObjectInspector) { + return JavaDataModel.get().lengthOfDecimal(); + } else if (oi instanceof WritableIntObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableLongObjectInspector) { + return JavaDataModel.get().primitive2(); + } else if (oi instanceof WritableShortObjectInspector) { + return JavaDataModel.get().primitive1(); + } else if (oi instanceof WritableTimestampObjectInspector) { + return JavaDataModel.get().lengthOfTimestamp(); + } + + return 0; + } + + /** + * Update the basic statistics of the statistics object based on the row number + * @param stats + * @param newNumRows + */ + public static void updateStats(Statistics stats, long newNumRows) { + long avgRowSize = stats.getAvgRowSize(); + stats.setNumRows(newNumRows); + stats.setDataSize(newNumRows * avgRowSize); + } + + /** + * For a given column return the distinct cardinality + * @param stats + * @param dbName + * @param tabName + * @param colName + * @return + */ + public static long getDistinctCountOfColumn(Statistics stats, String dbName, String tabName, + String colName) { + if (stats != null && !stats.getColumnStatsState().equals(State.NONE)) { + ColStatistics cs = + stats.getColumnStatisticsForColumn(Utilities.getFullyQualifiedColumnName(dbName, tabName, + colName)); + if (cs != null) { + if (cs.getNumNulls() > 0) { + // consider NULL as another distinct value + return 1 + cs.getCountDistint(); + } + return cs.getCountDistint(); + } + } + return 0; + } + + /** + * Get roots of a give operator + * @param curOp + * @return + */ + public static List getRoots(Operator curOp) { + List result = Lists.newArrayList(); + getRootsImpl(curOp, result); + return result; + } + + private static void getRootsImpl(Operator curOp, + List result) { + + for (Operator parent : curOp.getParentOperators()) { + if (parent instanceof TableScanOperator) { + TableScanOperator tsop = (TableScanOperator) parent; + result.add(tsop); + } else { + getRootsImpl(parent, result); + } + } + } + + public static ColStatistics getColStatsFromExpression(String dbName, String tabName, + String partName, ExprNodeDesc end) { + String colName = null; + String colType = null; + long numRows = getNumRows(dbName, tabName); + double avgColSize = 0; + long countDistincts = 0; + long numNulls = 0; + ObjectInspector oi = null; + + if (end instanceof ExprNodeColumnDesc) { + + // column projection + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; + colName = encd.getColumn(); + + if (encd.getIsPartitionColOrVirtualCol()) { + // vitual columns + colType = encd.getTypeInfo().getTypeName(); + countDistincts = numRows; + oi = encd.getWritableObjectInspector(); + } else { + if (partName != null) { + // partitioned table + return getParitionColumnStatsForColumn(dbName, tabName, partName, colName); + } else { + // unpartitioned table + return getTableColumnStatsForColumn(dbName, tabName, colName); + } + } + } else if (end instanceof ExprNodeConstantDesc) { + + // constant projection + ExprNodeConstantDesc encd = (ExprNodeConstantDesc) end; + // null projection + if (encd.getValue() == null) { + colName = encd.getName(); + colType = "null"; + numNulls = numRows; + } else { + colName = encd.getName(); + colType = encd.getTypeString(); + countDistincts = 1; + oi = encd.getWritableObjectInspector(); + } + } else if (end instanceof ExprNodeGenericFuncDesc) { + + // udf projection + ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end; + colName = engfd.getName(); + colType = engfd.getTypeString(); + countDistincts = numRows; + oi = engfd.getWritableObjectInspector(); + } else if (end instanceof ExprNodeNullDesc) { + + // null projection + ExprNodeNullDesc ennd = (ExprNodeNullDesc) end; + colName = ennd.getName(); + colType = "null"; + numNulls = numRows; + } else { + return null; + } + + if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME) + || colType.startsWith(serdeConstants.LIST_TYPE_NAME) + || colType.startsWith(serdeConstants.MAP_TYPE_NAME) + || colType.startsWith(serdeConstants.STRUCT_TYPE_NAME) + || colType.startsWith(serdeConstants.UNION_TYPE_NAME)) { + avgColSize = StatsUtils.getSizeOfVariableLengthTypes(oi, colType); + } else { + avgColSize = StatsUtils.getSizeOfFixedLengthPrimitivesFromType(colType); + } + + ColStatistics colStats = new ColStatistics(colName, colType); + colStats.setFullyQualifiedColName(Utilities.getFullyQualifiedColumnName(dbName, tabName, + colName)); + colStats.setAvgColLen(avgColSize); + colStats.setCountDistint(countDistincts); + colStats.setNumNulls(numNulls); + + return colStats; + } + + public static long getNumRows(String dbName, String tabName) { + return getBasicStatForTable(dbName, tabName, StatsSetupConst.ROW_COUNT); + } + + public static long getBasicStatForTable(String dbName, String tabName, String statType) { + Table table; + try { + table = Hive.get().getTable(dbName, tabName); + } catch (HiveException e) { + return 0; + } + + Map params = table.getParameters(); + long result = 0; + + if (params != null) { + if (statType.equals(StatsSetupConst.ROW_COUNT)) { + result = Long.parseLong(params.get(StatsSetupConst.ROW_COUNT)); + } else if (statType.equals(StatsSetupConst.RAW_DATA_SIZE)) { + result = Long.parseLong(params.get(StatsSetupConst.RAW_DATA_SIZE)); + } else if (statType.equals(StatsSetupConst.TOTAL_SIZE)) { + result = Long.parseLong(params.get(StatsSetupConst.TOTAL_SIZE)); + } + } + return result; + } + + public static long getDataSizeFromColumnStats(long numRows, List colStats) { + long result = 0; + + for (ColStatistics cs : colStats) { + if (cs != null) { + String colType = cs.getColumnType(); + long nonNullCount = numRows - cs.getNumNulls(); + if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { + result += nonNullCount * cs.getAvgColLen(); + } else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)) { + result += + nonNullCount + * JavaDataModel.get() + .lengthForStringOfLength((int) Math.round(cs.getAvgColLen())); + } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) { + result += + nonNullCount + * JavaDataModel.get().lengthForByteArrayOfSize( + (int) Math.round(cs.getAvgColLen())); + } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) { + result += nonNullCount * JavaDataModel.get().lengthOfTimestamp(); + } else if (colType.equalsIgnoreCase(serdeConstants.DECIMAL_TYPE_NAME)) { + result += nonNullCount * JavaDataModel.get().lengthOfDecimal(); + } else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) { + result += nonNullCount * JavaDataModel.get().lengthOfDate(); + } else { + result += nonNullCount * cs.getAvgColLen(); + } + } + } + + return result; + } + + public static Collection fetchColumnStatistics(String dbName, + String tableName, Map partList, ExprNodeDesc end) { + // get column statistics for partitioned or non-partitioned tables + List colStats = Lists.newArrayList(); + if (partList != null) { + for (PrunedPartitionList ppl : partList.values()) { + for (Partition part : ppl.getNotDeniedPartns()) { + String partName = part.getName(); + colStats.add(getColStatsFromExpression(dbName, tableName, partName, end)); + } + } + } else { + colStats.add(getColStatsFromExpression(dbName, tableName, null, end)); + } + return colStats; + } + + public static Statistics applySelectRule(String dbName, String tableName, + Operator pop, AnnotateStatsProcCtx aspCtx, + Map partList, Statistics parentStats, List inCols, + List outCols, Index lIndex) throws SemanticException { + + try { + + // TODO: handle the case SELECT *,UDF() FROM TABLE. SELECT operator doesn't provide + // enough information about this case. + + // both basic statistics and column statistics are required for the statistics to be + // reliable + if (parentStats.getBasicStatsState().equals(Statistics.State.COMPLETE) + && parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE) + && inCols.size() == outCols.size()) { + + List colStats = Lists.newArrayList(); + for (int i = 0; i < inCols.size(); i++) { + ExprNodeDesc end = inCols.get(i); + if (end instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; + String colName = null; + String tabAlias = null; + RowResolver rr = aspCtx.getParseContext().getOpParseCtx().get(pop).getRowResolver(); + + // internal name. we need to use inverse row resolver to get the actual name + if (encd.getColumn().startsWith("_")) { + + // the value field of inverse resolve map is String[] with 2 values. 1st value is + // table alias and 2nd value is column alias + String[] aliases = rr.getInvRslvMap().get(encd.getColumn()); + + // if the table and column aliases still could not be found, then try with output + // column name + if (aliases == null) { + aliases = rr.getInvRslvMap().get(outCols.get(i)); + tabAlias = aliases[0]; + colName = aliases[1]; + } else { + tabAlias = aliases[0]; + colName = aliases[1]; + } + } else { + colName = encd.getColumn(); + } + + // get the dependency info for the column name + ColumnInfo ci = rr.get(tabAlias, colName); + Dependency dep = lIndex.getDependency(pop, ci); + String actualColName = null; + + // if dependency is not available then column name is not internal name + if (dep == null) { + colStats.addAll(StatsUtils.fetchColumnStatistics(dbName, tableName, partList, end)); + } else { + BaseColumnInfo bci = dep.getBaseCols().get(0); + TableAliasInfo tai = bci.getTabAlias(); + + // might be count(*) or similar case + if (bci.getColumn() == null) { + return parentStats.clone(); + } + + // SIMPLE dependency will have only one direct base column, so we can reliably + // take first base column information. Else there will be multiple columns + // in which case we will fallback and use column name + if (!dep.getType().equals(DependencyType.SIMPLE)) { + actualColName = colName; + } else { + actualColName = bci.getColumn().getName(); + } + dbName = tai.getTable().getDbName(); + tableName = tai.getTable().getTableName(); + + ExprNodeColumnDesc cloneEncd = (ExprNodeColumnDesc) encd.clone(); + cloneEncd.setColumn(actualColName); + + colStats.addAll(StatsUtils.fetchColumnStatistics(dbName, tableName, partList, + cloneEncd)); + } + } else { + + // column name is not the internal name + colStats.addAll(StatsUtils.fetchColumnStatistics(dbName, tableName, partList, end)); + } + } + + // compute new statistics from column statistics + Statistics st = parentStats.clone(); + st.setColumnStats(colStats); + st.setDataSize(StatsUtils.getDataSizeFromColumnStats(st.getNumRows(), colStats)); + return st; + + } else { + // if basic stats are not complete then pass on the stats from its parent without apply + // any rule + if (parentStats != null) { + return parentStats.clone(); + } + } + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + return null; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java index 9c3c4c0..3352a08 100644 --- ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java +++ ql/src/java/org/apache/hadoop/hive/ql/util/JavaDataModel.java @@ -267,6 +267,15 @@ public int lengthForIntArrayOfSize(int length) { public int lengthForBooleanArrayOfSize(int length) { return lengthForPrimitiveArrayOfSize(PRIMITIVE_BYTE, length); } + public int lengthForTimestampArrayOfSize(int length) { + return lengthForPrimitiveArrayOfSize(lengthOfTimestamp(), length); + } + public int lengthForDateArrayOfSize(int length) { + return lengthForPrimitiveArrayOfSize(lengthOfDate(), length); + } + public int lengthForDecimalArrayOfSize(int length) { + return lengthForPrimitiveArrayOfSize(lengthOfDecimal(), length); + } public int lengthOfDecimal() { // object overhead + 8 bytes for intCompact + 4 bytes for precision diff --git ql/src/test/queries/clientpositive/annotate_stats_filter.q ql/src/test/queries/clientpositive/annotate_stats_filter.q new file mode 100644 index 0000000..5ccc4c5 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_filter.q @@ -0,0 +1,80 @@ +set hive.annotate.stats=true; + +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc; + +-- column stats are not COMPLETE, so stats are not updated +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc where state='OH'; + +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- state column has 5 distincts. numRows/countDistincts +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where state='OH'; + +-- not equals comparison shouldn't affect number of rows. rawDataSize is 792 and not 796 because of rounding off issue with avgColLen. avgColLen uses integers and not double. +-- numRows: 8 rawDataSize: 792 +explain extended select * from loc_orc where state!='OH'; +explain extended select * from loc_orc where state<>'OH'; + +-- nulls are treated as constant equality comparison +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where zip is null; +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where !(zip is not null); + +-- not nulls are treated as inverse of nulls +-- numRows: 7 rawDataSize: 693 +explain extended select * from loc_orc where zip is not null; +-- numRows: 7 rawDataSize: 693 +explain extended select * from loc_orc where !(zip is null); + +-- NOT evaluation. true will pass all rows, false will not pass any rows +-- numRows: 8 rawDataSize: 792 +explain extended select * from loc_orc where !false; +-- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where !true; + +-- OR evaluation. 1 row for OH and 1 row for CA +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where state='OH' or state='CA'; + +-- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2 +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where year=2001 and year is null; +-- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where year=2001 and state='OH' and state='FL'; + +-- AND and OR together. left expr will yield 1 row and right will yield 1 row +-- numRows: 3 rawDataSize: 297 +explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA'); + +-- AND and OR together. left expr will yield 8 rows and right will yield 1 row +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA'); + +-- all inequality conditions rows/3 is the rules +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where locid < 30; +explain extended select * from loc_orc where locid > 30; +explain extended select * from loc_orc where locid <= 30; +explain extended select * from loc_orc where locid >= 30; + +-- will invalidate session level column stats cache +drop table loc_staging; +drop table loc_orc; diff --git ql/src/test/queries/clientpositive/annotate_stats_groupby.q ql/src/test/queries/clientpositive/annotate_stats_groupby.q new file mode 100644 index 0000000..123e2e4 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_groupby.q @@ -0,0 +1,49 @@ +set hive.annotate.stats=true; + +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc; + +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- only one distinct value in year column + 1 NULL value +-- map-side and reduce-side GBY : numRows: 2 +explain extended select year from loc_orc group by year; + +-- map-side and reduce-side GBY : numRows: 4 +explain extended select state,locid from loc_orc group by state,locid; + +-- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid with cube; + +-- map-side GBY numRows: 12 reduce-side GBY numRows: 6 +explain extended select state,locid from loc_orc group by state,locid with rollup; + +-- map-side GBY numRows: 4 reduce-side GBY numRows: 2 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state)); + +-- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid)); + +-- map-side GBY numRows: 12 reduce-side GBY numRows: 6 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()); + +-- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()); + +-- will invalidate session level column stats cache +drop table loc_staging; +drop table loc_orc; diff --git ql/src/test/queries/clientpositive/annotate_stats_join.q ql/src/test/queries/clientpositive/annotate_stats_join.q new file mode 100644 index 0000000..6c244e0 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_join.q @@ -0,0 +1,44 @@ +set hive.annotate.stats=true; + +create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile; + +create table if not exists dept_staging ( + deptname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile; + +create table if not exists emp_orc like emp_staging; +alter table emp_orc set fileformat orc; + +create table if not exists dept_orc like dept_staging; +alter table dept_orc set fileformat orc; + +LOAD DATA LOCAL INPATH '../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging; +LOAD DATA LOCAL INPATH '../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging; + +insert overwrite table emp_orc select * from emp_staging; +insert overwrite table dept_orc select * from dept_staging; + +analyze table emp_orc compute statistics for columns lastname,deptid; + +-- no statistics will be displayed for this case as column statistics for table dept_orc is not available yet +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid); + +analyze table dept_orc compute statistics for columns deptname,deptid; + +-- emp_orc numRows: 6 DV: 3, dept_orc numRows: 4 DV: 4. Output of join will yield 6 rows (6*4)/max(3,4) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid); + +set hive.auto.convert.join=false; + +-- emp_orc numRows: 6 DV: 3, dept_orc numRows: 4 DV: 4. Output of join will yield 6 rows (6*4)/max(3,4) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid); + +-- will invalidate session level column stats cache +drop table emp_staging; +drop table dept_staging; +drop table emp_orc; +drop table dept_orc; diff --git ql/src/test/queries/clientpositive/annotate_stats_limit.q ql/src/test/queries/clientpositive/annotate_stats_limit.q new file mode 100644 index 0000000..53f4e83 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_limit.q @@ -0,0 +1,29 @@ +set hive.annotate.stats=true; + +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc; + +-- numRows: 4 rawDataSize: 396 +explain extended select * from loc_orc limit 4; + +-- greater than the available number of rows +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc limit 16; + +-- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc limit 0; + diff --git ql/src/test/queries/clientpositive/annotate_stats_part.q ql/src/test/queries/clientpositive/annotate_stats_part.q new file mode 100644 index 0000000..0eb6e40 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_part.q @@ -0,0 +1,72 @@ +set hive.annotate.stats=true; + +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +LOAD DATA LOCAL INPATH '../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging; + +create table if not exists loc_orc ( + state string, + locid int, + zip bigint +) partitioned by(year int) stored as orc; + +-- basicStatState: NONE level: PARTITION colStatState: NONE +explain extended select * from loc_orc; + +set hive.stats.autogather=false; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +insert overwrite table loc_orc partition(year) select * from loc_staging; + +-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc; + +-- partition level analyze statistics for specific parition +analyze table loc_orc partition(year=2001) compute statistics; +-- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__'; +-- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc; +-- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001; + +-- partition level analyze statistics for all partitions +analyze table loc_orc partition(year) compute statistics; +-- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__'; +-- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc; +-- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001 or year='__HIVE_DEFAULT_PARTITION__'; +-- both partitions will be pruned +-- basicStatState: NONE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001 and year='__HIVE_DEFAULT_PARTITION__'; + +-- partition level partial column statistics +analyze table loc_orc partition(year=2001) compute statistics for columns state,locid; +-- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select zip from loc_orc; +-- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select state from loc_orc; + +-- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select state,locid from loc_orc; +-- basicStatState: COMPLETE level: PARTITION colStatState: COMPLETE +explain extended select state,locid from loc_orc where year=2001; +-- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select state,locid from loc_orc where year!=2001; +-- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select * from loc_orc; + +-- will invalidate session level column stats cache +drop table loc_staging; +drop table loc_orc; diff --git ql/src/test/queries/clientpositive/annotate_stats_ptf.q ql/src/test/queries/clientpositive/annotate_stats_ptf.q new file mode 100644 index 0000000..0ab2c71 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_ptf.q @@ -0,0 +1,36 @@ +set hive.annotate.stats=true; + +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by locid) from loc_orc; + +-- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by locid,zip) from loc_orc; + +-- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid) from loc_orc; + +-- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid rows between unbounded preceding and current row) from loc_orc; + +-- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid rows between 3 preceding and current row) from loc_orc; + +-- will invalidate session level column stats cache +drop table loc_staging; +drop table loc_orc; diff --git ql/src/test/queries/clientpositive/annotate_stats_select.q ql/src/test/queries/clientpositive/annotate_stats_select.q new file mode 100644 index 0000000..988ba5f --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_select.q @@ -0,0 +1,110 @@ +set hive.annotate.stats=true; + +create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile; + +create table alltypes_orc like alltypes; +alter table alltypes_orc set fileformat orc; + +load data local inpath '../data/files/alltypes.txt' overwrite into table alltypes; + +insert overwrite table alltypes_orc select * from alltypes; + +-- basicStatState: PARTIAL level: TABLE colStatState: NONE numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc; + +-- statistics for complex types are not supported yet +analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1,s1; + +-- numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc; + +-- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc; + +-- col alias renaming +-- numRows: 2 rawDataSize: 8 +explain extended select i1 as int1 from alltypes_orc; + +-- numRows: 2 rawDataSize: 172 +explain extended select s1 from alltypes_orc; + +-- column statistics for complex types unsupported and so statistics will not be updated +-- numRows: 2 rawDataSize: 1514 +explain extended select m1 from alltypes_orc; + +-- numRows: 2 rawDataSize: 244 +explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc; + +-- numRows: 2 rawDataSize: 0 +explain extended select null from alltypes_orc; + +-- numRows: 2 rawDataSize: 8 +explain extended select 11 from alltypes_orc; + +-- numRows: 2 rawDataSize: 16 +explain extended select 11L from alltypes_orc; + +-- numRows: 2 rawDataSize: 16 +explain extended select 11.0 from alltypes_orc; + +-- numRows: 2 rawDataSize: 178 +explain extended select "hello" from alltypes_orc; + +-- numRows: 2 rawDataSize: 96 +explain extended select unbase64("0xe23") from alltypes_orc; + +-- numRows: 2 rawDataSize: 16 +explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc; + +-- numRows: 2 rawDataSize: 80 +explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc; + +-- numRows: 2 rawDataSize: 112 +explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc; + +-- numRows: 2 rawDataSize: 224 +explain extended select cast("58.174" as DECIMAL) from alltypes_orc; + +-- numRows: 2 rawDataSize: 112 +explain extended select array(1,2,3) from alltypes_orc; + +-- numRows: 2 rawDataSize: 1508 +explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc; + +-- numRows: 2 rawDataSize: 112 +explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc; + +-- numRows: 2 rawDataSize: 250 +explain extended select CREATE_UNION(0, "hello") from alltypes_orc; + +-- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 757 +explain extended select count(*) from alltypes_orc; + +-- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 757 +explain extended select count(1) from alltypes_orc; + +-- numRows: 2 rawDataSize: 1522 +explain extended select *,11 from alltypes_orc; + +-- will invalidate session level column stats cache +drop table alltypes; +drop table alltypes_orc; diff --git ql/src/test/queries/clientpositive/annotate_stats_table.q ql/src/test/queries/clientpositive/annotate_stats_table.q new file mode 100644 index 0000000..4b63cd1 --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_table.q @@ -0,0 +1,48 @@ +set hive.annotate.stats=true; + +create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile; + +create table if not exists emp_orc like emp_staging; +alter table emp_orc set fileformat orc; + +-- basicStatState: NONE level: TABLE colStatState: NONE +explain extended select * from emp_orc; + +LOAD DATA LOCAL INPATH '../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging; + +set hive.stats.autogather=false; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +insert overwrite table emp_orc select * from emp_staging; + +-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL level: TABLE colStatState: NONE +explain extended select * from emp_orc; + +-- table level analyze statistics +analyze table emp_orc compute statistics; + +-- basicStatState: COMPLETE level: TABLE colStatState: NONE +explain extended select * from emp_orc; + +-- column level partial statistics +analyze table emp_orc compute statistics for columns deptid; +-- basicStatState: COMPLETE level: TABLE colStatState: PARTIAL +explain extended select * from emp_orc; +-- all selected columns have statistics +-- basicStatState: COMPLETE level: TABLE colStatState: COMPLETE +explain extended select deptid from emp_orc; + +-- column level complete statistics +analyze table emp_orc compute statistics for columns lastname,deptid; +-- basicStatState: COMPLETE level: TABLE colStatState: COMPLETE +explain extended select * from emp_orc; + +-- will invalidate session level column stats cache +drop table emp_staging; +drop table emp_orc; diff --git ql/src/test/queries/clientpositive/annotate_stats_union.q ql/src/test/queries/clientpositive/annotate_stats_union.q new file mode 100644 index 0000000..bfeae9d --- /dev/null +++ ql/src/test/queries/clientpositive/annotate_stats_union.q @@ -0,0 +1,62 @@ +set hive.annotate.stats=true; + +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- numRows: 8 rawDataSize: 680 +explain extended select state from loc_orc; + +-- numRows: 16 rawDataSize: 1360 +explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp; + +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc; + +-- numRows: 16 rawDataSize: 1592 +explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp; + +create database test; +use test; +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +analyze table loc_staging compute statistics for columns state,locid,zip,year; +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- there should be 2 entries for DB statistics. Since there are 2 different DBs statistics can't be merged +explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp; + +-- there should be 2 entries for Table statistics. Since there are 2 different tables statistics can't be merged +explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp; + +-- will invalidate session level column stats cache +drop table loc_staging; +drop table loc_orc; +drop database test; +use default; +drop table loc_staging; +drop table loc_orc; diff --git ql/src/test/results/clientpositive/annotate_stats_filter.q.out ql/src/test/results/clientpositive/annotate_stats_filter.q.out new file mode 100644 index 0000000..cb9cfcd --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -0,0 +1,2496 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- column stats are not COMPLETE, so stats are not updated +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc where state='OH' +PREHOOK: type: QUERY +POSTHOOK: query: -- column stats are not COMPLETE, so stats are not updated +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc where state='OH' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL state) 'OH')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (state = 'OH') + type: boolean + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- state column has 5 distincts. numRows/countDistincts +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where state='OH' +PREHOOK: type: QUERY +POSTHOOK: query: -- state column has 5 distincts. numRows/countDistincts +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where state='OH' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL state) 'OH')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (state = 'OH') + type: boolean + Statistics: + numRows: 1 dataSize: 99 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 99 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 99 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- not equals comparison shouldn't affect number of rows. rawDataSize is 792 and not 796 because of rounding off issue with avgColLen. avgColLen uses integers and not double. +-- numRows: 8 rawDataSize: 792 +explain extended select * from loc_orc where state!='OH' +PREHOOK: type: QUERY +POSTHOOK: query: -- not equals comparison shouldn't affect number of rows. rawDataSize is 792 and not 796 because of rounding off issue with avgColLen. avgColLen uses integers and not double. +-- numRows: 8 rawDataSize: 792 +explain extended select * from loc_orc where state!='OH' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (!= (TOK_TABLE_OR_COL state) 'OH')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (state <> 'OH') + type: boolean + Statistics: + numRows: 8 dataSize: 792 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 792 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 792 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select * from loc_orc where state<>'OH' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from loc_orc where state<>'OH' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (<> (TOK_TABLE_OR_COL state) 'OH')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (state <> 'OH') + type: boolean + Statistics: + numRows: 8 dataSize: 792 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 792 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 792 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- nulls are treated as constant equality comparison +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where zip is null +PREHOOK: type: QUERY +POSTHOOK: query: -- nulls are treated as constant equality comparison +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where zip is null +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL zip))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: zip is null + type: boolean + Statistics: + numRows: 1 dataSize: 99 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 99 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 99 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where !(zip is not null) +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where !(zip is not null) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL zip)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not zip is not null) + type: boolean + Statistics: + numRows: 1 dataSize: 99 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 99 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 99 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- not nulls are treated as inverse of nulls +-- numRows: 7 rawDataSize: 693 +explain extended select * from loc_orc where zip is not null +PREHOOK: type: QUERY +POSTHOOK: query: -- not nulls are treated as inverse of nulls +-- numRows: 7 rawDataSize: 693 +explain extended select * from loc_orc where zip is not null +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL zip))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: zip is not null + type: boolean + Statistics: + numRows: 7 dataSize: 693 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 7 dataSize: 693 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 7 dataSize: 693 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 7 rawDataSize: 693 +explain extended select * from loc_orc where !(zip is null) +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 7 rawDataSize: 693 +explain extended select * from loc_orc where !(zip is null) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL zip)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not zip is null) + type: boolean + Statistics: + numRows: 7 dataSize: 693 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 7 dataSize: 693 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 7 dataSize: 693 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- NOT evaluation. true will pass all rows, false will not pass any rows +-- numRows: 8 rawDataSize: 792 +explain extended select * from loc_orc where !false +PREHOOK: type: QUERY +POSTHOOK: query: -- NOT evaluation. true will pass all rows, false will not pass any rows +-- numRows: 8 rawDataSize: 792 +explain extended select * from loc_orc where !false +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! false)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not false) + type: boolean + Statistics: + numRows: 8 dataSize: 792 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 792 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 792 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where !true +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where !true +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (! true)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (not true) + type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- OR evaluation. 1 row for OH and 1 row for CA +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where state='OH' or state='CA' +PREHOOK: type: QUERY +POSTHOOK: query: -- OR evaluation. 1 row for OH and 1 row for CA +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where state='OH' or state='CA' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (= (TOK_TABLE_OR_COL state) 'OH') (= (TOK_TABLE_OR_COL state) 'CA'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: ((state = 'OH') or (state = 'CA')) + type: boolean + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2 +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where year=2001 and year is null +PREHOOK: type: QUERY +POSTHOOK: query: -- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2 +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where year=2001 and year is null +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL year) 2001) (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL year)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: ((year = 2001) and year is null) + type: boolean + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where year=2001 and state='OH' and state='FL' +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc where year=2001 and state='OH' and state='FL' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL year) 2001) (= (TOK_TABLE_OR_COL state) 'OH')) (= (TOK_TABLE_OR_COL state) 'FL'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (((year = 2001) and (state = 'OH')) and (state = 'FL')) + type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- AND and OR together. left expr will yield 1 row and right will yield 1 row +-- numRows: 3 rawDataSize: 297 +explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA') +PREHOOK: type: QUERY +POSTHOOK: query: -- AND and OR together. left expr will yield 1 row and right will yield 1 row +-- numRows: 3 rawDataSize: 297 +explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA') +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (and (= (TOK_TABLE_OR_COL year) 2001) (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL year))) (= (TOK_TABLE_OR_COL state) 'CA'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (((year = 2001) and year is null) or (state = 'CA')) + type: boolean + Statistics: + numRows: 3 dataSize: 297 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 3 dataSize: 297 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 3 dataSize: 297 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- AND and OR together. left expr will yield 8 rows and right will yield 1 row +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA') +PREHOOK: type: QUERY +POSTHOOK: query: -- AND and OR together. left expr will yield 8 rows and right will yield 1 row +-- numRows: 1 rawDataSize: 99 +explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA') +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (or (= (TOK_TABLE_OR_COL year) 2001) (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL year))) (= (TOK_TABLE_OR_COL state) 'CA'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (((year = 2001) or year is null) and (state = 'CA')) + type: boolean + Statistics: + numRows: 1 dataSize: 99 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 99 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 99 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- all inequality conditions rows/3 is the rules +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where locid < 30 +PREHOOK: type: QUERY +POSTHOOK: query: -- all inequality conditions rows/3 is the rules +-- numRows: 2 rawDataSize: 198 +explain extended select * from loc_orc where locid < 30 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (< (TOK_TABLE_OR_COL locid) 30)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (locid < 30) + type: boolean + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select * from loc_orc where locid > 30 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from loc_orc where locid > 30 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL locid) 30)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (locid > 30) + type: boolean + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select * from loc_orc where locid <= 30 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from loc_orc where locid <= 30 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (<= (TOK_TABLE_OR_COL locid) 30)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (locid <= 30) + type: boolean + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain extended select * from loc_orc where locid >= 30 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from loc_orc where locid >= 30 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (>= (TOK_TABLE_OR_COL locid) 30)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (locid >= 30) + type: boolean + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- will invalidate session level column stats cache +drop table loc_staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_staging +POSTHOOK: query: -- will invalidate session level column stats cache +drop table loc_staging +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_staging +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: drop table loc_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: drop table loc_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out new file mode 100644 index 0000000..4b67400 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -0,0 +1,1452 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- only one distinct value in year column + 1 NULL value +-- map-side and reduce-side GBY : numRows: 2 +explain extended select year from loc_orc group by year +PREHOOK: type: QUERY +POSTHOOK: query: -- only one distinct value in year column + 1 NULL value +-- map-side and reduce-side GBY : numRows: 2 +explain extended select year from loc_orc group by year +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL year))) (TOK_GROUPBY (TOK_TABLE_OR_COL year)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: year + type: int + outputColumnNames: year + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: year + type: int + mode: hash + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: mergepartial + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 4 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 4 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side and reduce-side GBY : numRows: 4 +explain extended select state,locid from loc_orc group by state,locid +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side and reduce-side GBY : numRows: 4 +explain extended select state,locid from loc_orc group by state,locid +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + mode: hash + outputColumnNames: _col0, _col1 + Statistics: + numRows: 4 dataSize: 396 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + Statistics: + numRows: 4 dataSize: 396 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: + numRows: 4 dataSize: 396 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid with cube +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid with cube +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 16 dataSize: 1584 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 16 dataSize: 1584 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 8 dataSize: 792 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 12 reduce-side GBY numRows: 6 +explain extended select state,locid from loc_orc group by state,locid with rollup +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 12 reduce-side GBY numRows: 6 +explain extended select state,locid from loc_orc group by state,locid with rollup +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 12 dataSize: 1188 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 12 dataSize: 1188 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 594 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 540 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 540 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 4 reduce-side GBY numRows: 2 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state)) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 4 reduce-side GBY numRows: 2 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state)) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 4 dataSize: 396 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 4 dataSize: 396 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 2 dataSize: 198 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 180 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 180 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL locid))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 8 dataSize: 792 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 8 dataSize: 792 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 4 dataSize: 396 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 4 dataSize: 360 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 12 reduce-side GBY numRows: 6 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 12 reduce-side GBY numRows: 6 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL locid)) TOK_GROUPING_SETS_EXPRESSION))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 12 dataSize: 1188 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 12 dataSize: 1188 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 6 dataSize: 594 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 540 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 540 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_GROUPING_SETS (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state) (TOK_TABLE_OR_COL locid)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL state)) (TOK_GROUPING_SETS_EXPRESSION (TOK_TABLE_OR_COL locid)) TOK_GROUPING_SETS_EXPRESSION))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: state, locid + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + bucketGroup: false + keys: + expr: state + type: string + expr: locid + type: int + expr: '0' + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 16 dataSize: 1584 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: string + Statistics: + numRows: 16 dataSize: 1584 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: int + expr: KEY._col2 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: + numRows: 8 dataSize: 792 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 720 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- will invalidate session level column stats cache +drop table loc_staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_staging +POSTHOOK: query: -- will invalidate session level column stats cache +drop table loc_staging +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_staging +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: drop table loc_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: drop table loc_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] diff --git ql/src/test/results/clientpositive/annotate_stats_join.q.out ql/src/test/results/clientpositive/annotate_stats_join.q.out new file mode 100644 index 0000000..47aba66 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -0,0 +1,792 @@ +PREHOOK: query: create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@emp_staging +PREHOOK: query: create table if not exists dept_staging ( + deptname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists dept_staging ( + deptname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dept_staging +PREHOOK: query: create table if not exists emp_orc like emp_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists emp_orc like emp_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@emp_orc +PREHOOK: query: alter table emp_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc +POSTHOOK: query: alter table emp_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc +PREHOOK: query: create table if not exists dept_orc like dept_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists dept_orc like dept_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dept_orc +PREHOOK: query: alter table dept_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@dept_orc +PREHOOK: Output: default@dept_orc +POSTHOOK: query: alter table dept_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@dept_orc +POSTHOOK: Output: default@dept_orc +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@emp_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@emp_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@dept_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@dept_staging +PREHOOK: query: insert overwrite table emp_orc select * from emp_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_staging +PREHOOK: Output: default@emp_orc +POSTHOOK: query: insert overwrite table emp_orc select * from emp_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_staging +POSTHOOK: Output: default@emp_orc +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: insert overwrite table dept_orc select * from dept_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_staging +PREHOOK: Output: default@dept_orc +POSTHOOK: query: insert overwrite table dept_orc select * from dept_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_staging +POSTHOOK: Output: default@dept_orc +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- no statistics will be displayed for this case as column statistics for table dept_orc is not available yet +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +PREHOOK: type: QUERY +POSTHOOK: query: -- no statistics will be displayed for this case as column statistics for table dept_orc is not available yet +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + numRows: 4 dataSize: 344 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 4 dataSize: 344 basicStatsState: COMPLETE colStatsState: NONE + tag: 1 + value expressions: + expr: deptname + type: string + expr: deptid + type: int + e + TableScan + alias: e + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns deptname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numPartitions 0 + numRows 4 + rawDataSize 344 + serialization.ddl struct dept_orc { string deptname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 229 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns deptname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numPartitions 0 + numRows 4 + rawDataSize 344 + serialization.ddl struct dept_orc { string deptname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 229 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numPartitions 0 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 300 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numPartitions 0 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 300 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: + numRows: 9 dataSize: 837 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: string + expr: _col5 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 9 dataSize: 837 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 9 dataSize: 837 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: analyze table dept_orc compute statistics for columns deptname,deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@dept_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table dept_orc compute statistics for columns deptname,deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dept_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- emp_orc numRows: 6 DV: 3, dept_orc numRows: 4 DV: 4. Output of join will yield 6 rows (6*4)/max(3,4) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +PREHOOK: type: QUERY +POSTHOOK: query: -- emp_orc numRows: 6 DV: 3, dept_orc numRows: 4 DV: 4. Output of join will yield 6 rows (6*4)/max(3,4) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + numRows: 4 dataSize: 344 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 4 dataSize: 344 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 1 + value expressions: + expr: deptname + type: string + expr: deptid + type: int + e + TableScan + alias: e + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns deptname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numPartitions 0 + numRows 4 + rawDataSize 344 + serialization.ddl struct dept_orc { string deptname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 229 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns deptname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numPartitions 0 + numRows 4 + rawDataSize 344 + serialization.ddl struct dept_orc { string deptname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 229 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numPartitions 0 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 300 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numPartitions 0 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 300 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: + numRows: 9 dataSize: 837 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: string + expr: _col5 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 9 dataSize: 837 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 9 dataSize: 837 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- emp_orc numRows: 6 DV: 3, dept_orc numRows: 4 DV: 4. Output of join will yield 6 rows (6*4)/max(3,4) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +PREHOOK: type: QUERY +POSTHOOK: query: -- emp_orc numRows: 6 DV: 3, dept_orc numRows: 4 DV: 4. Output of join will yield 6 rows (6*4)/max(3,4) +explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME emp_orc) e) (TOK_TABREF (TOK_TABNAME dept_orc) d) (= (. (TOK_TABLE_OR_COL e) deptid) (. (TOK_TABLE_OR_COL d) deptid)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + d + TableScan + alias: d + Statistics: + numRows: 4 dataSize: 344 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 4 dataSize: 344 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 1 + value expressions: + expr: deptname + type: string + expr: deptid + type: int + e + TableScan + alias: e + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: deptid + type: int + sort order: + + Map-reduce partition columns: + expr: deptid + type: int + Statistics: + numRows: 6 dataSize: 560 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: 0 + value expressions: + expr: lastname + type: string + expr: deptid + type: int + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: dept_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns deptname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numPartitions 0 + numRows 4 + rawDataSize 344 + serialization.ddl struct dept_orc { string deptname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 229 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns deptname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.dept_orc + numFiles 1 + numPartitions 0 + numRows 4 + rawDataSize 344 + serialization.ddl struct dept_orc { string deptname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 229 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.dept_orc + name: default.dept_orc +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numPartitions 0 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 300 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numPartitions 0 + numRows 6 + rawDataSize 560 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 300 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /dept_orc [d] + /emp_orc [e] + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4, _col5 + Statistics: + numRows: 9 dataSize: 837 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col4 + type: string + expr: _col5 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 9 dataSize: 837 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 9 dataSize: 837 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- will invalidate session level column stats cache +drop table emp_staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@emp_staging +PREHOOK: Output: default@emp_staging +POSTHOOK: query: -- will invalidate session level column stats cache +drop table emp_staging +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@emp_staging +POSTHOOK: Output: default@emp_staging +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: drop table dept_staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dept_staging +PREHOOK: Output: default@dept_staging +POSTHOOK: query: drop table dept_staging +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dept_staging +POSTHOOK: Output: default@dept_staging +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: drop table emp_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc +POSTHOOK: query: drop table emp_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: drop table dept_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dept_orc +PREHOOK: Output: default@dept_orc +POSTHOOK: query: drop table dept_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dept_orc +POSTHOOK: Output: default@dept_orc +POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] diff --git ql/src/test/results/clientpositive/annotate_stats_limit.q.out ql/src/test/results/clientpositive/annotate_stats_limit.q.out new file mode 100644 index 0000000..f1bcec7 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_limit.q.out @@ -0,0 +1,223 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- numRows: 4 rawDataSize: 396 +explain extended select * from loc_orc limit 4 +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 4 rawDataSize: 396 +explain extended select * from loc_orc limit 4 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 4))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 4 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + Limit + Statistics: + numRows: 4 dataSize: 396 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- greater than the available number of rows +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc limit 16 +PREHOOK: type: QUERY +POSTHOOK: query: -- greater than the available number of rows +-- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc limit 16 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 16))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 16 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + Limit + ListSink + + +PREHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc limit 0 +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 0 rawDataSize: 0 +explain extended select * from loc_orc limit 0 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 0))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 0 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: NONE + Limit + Statistics: + numRows: 0 dataSize: 0 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out new file mode 100644 index 0000000..cdfd54c --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -0,0 +1,1852 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table if not exists loc_orc ( + state string, + locid int, + zip bigint +) partitioned by(year int) stored as orc +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_orc ( + state string, + locid int, + zip bigint +) partitioned by(year int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: -- basicStatState: NONE level: PARTITION colStatState: NONE +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: NONE level: PARTITION colStatState: NONE +explain extended select * from loc_orc +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + ListSink + + +PREHOOK: query: insert overwrite table loc_orc partition(year) select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc partition(year) select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc@year=2001 +POSTHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 0 dataSize: 621 basicStatsState: PARTIAL colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 621 basicStatsState: PARTIAL colStatsState: NONE + ListSink + + +PREHOOK: query: -- partition level analyze statistics for specific parition +analyze table loc_orc partition(year=2001) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +PREHOOK: Input: default@loc_orc@year=2001 +PREHOOK: Output: default@loc_orc +PREHOOK: Output: default@loc_orc@year=2001 +POSTHOOK: query: -- partition level analyze statistics for specific parition +analyze table loc_orc partition(year=2001) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +POSTHOOK: Input: default@loc_orc@year=2001 +POSTHOOK: Output: default@loc_orc +POSTHOOK: Output: default@loc_orc@year=2001 +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__')))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 0 dataSize: 277 basicStatsState: PARTIAL colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 277 basicStatsState: PARTIAL colStatsState: NONE + ListSink + + +PREHOOK: query: -- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: PARTIAL level: PARTITION colStatState: NONE +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + partition_columns year + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 7 dataSize: 621 basicStatsState: PARTIAL colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 7 dataSize: 621 basicStatsState: PARTIAL colStatsState: NONE + ListSink + + +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001 +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL year) 2001)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 7 dataSize: 344 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 7 dataSize: 344 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- partition level analyze statistics for all partitions +analyze table loc_orc partition(year) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +PREHOOK: Input: default@loc_orc@year=2001 +PREHOOK: Input: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +PREHOOK: Output: default@loc_orc +PREHOOK: Output: default@loc_orc@year=2001 +PREHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: query: -- partition level analyze statistics for all partitions +analyze table loc_orc partition(year) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +POSTHOOK: Input: default@loc_orc@year=2001 +POSTHOOK: Input: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Output: default@loc_orc +POSTHOOK: Output: default@loc_orc@year=2001 +POSTHOOK: Output: default@loc_orc@year=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__')))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 1 dataSize: 277 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 1 dataSize: 277 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 621 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 621 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001 or year='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001 or year='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (= (TOK_TABLE_OR_COL year) 2001) (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__'))))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 621 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 621 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- both partitions will be pruned +-- basicStatState: NONE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001 and year='__HIVE_DEFAULT_PARTITION__' +PREHOOK: type: QUERY +POSTHOOK: query: -- both partitions will be pruned +-- basicStatState: NONE level: PARTITION colStatState: NONE +explain extended select * from loc_orc where year=2001 and year='__HIVE_DEFAULT_PARTITION__' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL year) 2001) (= (TOK_TABLE_OR_COL year) '__HIVE_DEFAULT_PARTITION__'))))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: ((year = 2001) and (year = '__HIVE_DEFAULT_PARTITION__')) + type: boolean + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + ListSink + + +PREHOOK: query: -- partition level partial column statistics +analyze table loc_orc partition(year=2001) compute statistics for columns state,locid +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +PREHOOK: Input: default@loc_orc@year=2001 +#### A masked pattern was here #### +POSTHOOK: query: -- partition level partial column statistics +analyze table loc_orc partition(year=2001) compute statistics for columns state,locid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +POSTHOOK: Input: default@loc_orc@year=2001 +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select zip from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select zip from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL zip))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 621 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: zip + type: bigint + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 621 basicStatsState: COMPLETE colStatsState: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 621 basicStatsState: COMPLETE colStatsState: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc +#### A masked pattern was here #### + Partition + base file name: year=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=2001 [loc_orc] + /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select state from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select state from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 621 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 621 basicStatsState: COMPLETE colStatsState: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 621 basicStatsState: COMPLETE colStatsState: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc +#### A masked pattern was here #### + Partition + base file name: year=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=2001 [loc_orc] + /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select state,locid from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL +-- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select state,locid from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 621 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 8 dataSize: 621 basicStatsState: COMPLETE colStatsState: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 621 basicStatsState: COMPLETE colStatsState: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc +#### A masked pattern was here #### + Partition + base file name: year=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=2001 [loc_orc] + /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: COMPLETE +explain extended select state,locid from loc_orc where year=2001 +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: COMPLETE +explain extended select state,locid from loc_orc where year=2001 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_WHERE (= (TOK_TABLE_OR_COL year) 2001)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 7 dataSize: 344 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 7 dataSize: 630 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 7 dataSize: 630 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=2001 + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=2001 [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select state,locid from loc_orc where year!=2001 +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: NONE +explain extended select state,locid from loc_orc where year!=2001 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)) (TOK_SELEXPR (TOK_TABLE_OR_COL locid))) (TOK_WHERE (!= (TOK_TABLE_OR_COL year) 2001)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 1 dataSize: 277 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (year <> 2001) + type: boolean + Statistics: + numRows: 1 dataSize: 277 basicStatsState: COMPLETE colStatsState: NONE + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 1 dataSize: 277 basicStatsState: COMPLETE colStatsState: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 277 basicStatsState: COMPLETE colStatsState: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: year=__HIVE_DEFAULT_PARTITION__ + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc/year=__HIVE_DEFAULT_PARTITION__ [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: PARTITION colStatState: PARTIAL +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year 2001 + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 7 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 344 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Partition + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + partition values: + year __HIVE_DEFAULT_PARTITION__ + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numRows 1 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 277 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip + columns.types string:int:bigint +#### A masked pattern was here #### + name default.loc_orc + numFiles 2 + numPartitions 2 + numRows 8 + partition_columns year + rawDataSize 0 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 621 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 621 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 621 basicStatsState: COMPLETE colStatsState: PARTIAL + ListSink + + +PREHOOK: query: -- will invalidate session level column stats cache +drop table loc_staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_staging +POSTHOOK: query: -- will invalidate session level column stats cache +drop table loc_staging +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_staging +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: drop table loc_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: drop table loc_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=2001).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc PARTITION(year=__HIVE_DEFAULT_PARTITION__).zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] diff --git ql/src/test/results/clientpositive/annotate_stats_ptf.q.out ql/src/test/results/clientpositive/annotate_stats_ptf.q.out new file mode 100644 index 0000000..e83132b --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_ptf.q.out @@ -0,0 +1,795 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by locid) from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by locid) from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL zip)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL state) (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL locid))))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: locid + type: int + expr: locid + type: int + sort order: ++ + Map-reduce partition columns: + expr: locid + type: int + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + PTF Operator + Statistics: + numRows: 8 dataSize: 120 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col2 + type: bigint + expr: _wcol0 + type: bigint + outputColumnNames: _col0, _col1 + Statistics: + numRows: 8 dataSize: 120 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 120 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types bigint:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by locid,zip) from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by locid,zip) from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL zip)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL state) (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL locid) (TOK_TABLE_OR_COL zip))))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: locid + type: int + expr: zip + type: bigint + expr: locid + type: int + expr: zip + type: bigint + sort order: ++++ + Map-reduce partition columns: + expr: locid + type: int + expr: zip + type: bigint + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + PTF Operator + Statistics: + numRows: 8 dataSize: 120 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col2 + type: bigint + expr: _wcol0 + type: bigint + outputColumnNames: _col0, _col1 + Statistics: + numRows: 8 dataSize: 120 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 120 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types bigint:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid) from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid) from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL zip)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL state) (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL zip)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL locid)))))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: zip + type: bigint + expr: locid + type: int + sort order: ++ + Map-reduce partition columns: + expr: zip + type: bigint + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + PTF Operator + Statistics: + numRows: 8 dataSize: 120 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col2 + type: bigint + expr: _wcol0 + type: bigint + outputColumnNames: _col0, _col1 + Statistics: + numRows: 8 dataSize: 120 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 120 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types bigint:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid rows between unbounded preceding and current row) from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid rows between unbounded preceding and current row) from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL zip)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL state) (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL zip)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL locid)))) (TOK_WINDOWRANGE (preceding unbounded) current))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: zip + type: bigint + expr: locid + type: int + sort order: ++ + Map-reduce partition columns: + expr: zip + type: bigint + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + PTF Operator + Statistics: + numRows: 8 dataSize: 120 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col2 + type: bigint + expr: _wcol0 + type: bigint + outputColumnNames: _col0, _col1 + Statistics: + numRows: 8 dataSize: 120 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 120 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types bigint:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid rows between 3 preceding and current row) from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 120 +explain extended select zip, count(state) over (partition by zip order by locid rows between 3 preceding and current row) from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL zip)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_TABLE_OR_COL state) (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL zip)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL locid)))) (TOK_WINDOWRANGE (preceding 3) current))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Reduce Output Operator + key expressions: + expr: zip + type: bigint + expr: locid + type: int + sort order: ++ + Map-reduce partition columns: + expr: zip + type: bigint + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + PTF Operator + Statistics: + numRows: 8 dataSize: 120 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col2 + type: bigint + expr: _wcol0 + type: bigint + outputColumnNames: _col0, _col1 + Statistics: + numRows: 8 dataSize: 120 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 120 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types bigint:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- will invalidate session level column stats cache +drop table loc_staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_staging +POSTHOOK: query: -- will invalidate session level column stats cache +drop table loc_staging +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_staging +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: drop table loc_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: drop table loc_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] diff --git ql/src/test/results/clientpositive/annotate_stats_select.q.out ql/src/test/results/clientpositive/annotate_stats_select.q.out new file mode 100644 index 0000000..091de77 --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -0,0 +1,3154 @@ +PREHOOK: query: create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists alltypes ( + bo1 boolean, + ti1 tinyint, + si1 smallint, + i1 int, + bi1 bigint, + f1 float, + d1 double, + de1 decimal, + ts1 timestamp, + da1 timestamp, + s1 string, + m1 map, + l1 array, + st1 struct +) row format delimited fields terminated by '|' +collection items terminated by ',' +map keys terminated by ':' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@alltypes +PREHOOK: query: create table alltypes_orc like alltypes +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table alltypes_orc like alltypes +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: alter table alltypes_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: alter table alltypes_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc +PREHOOK: query: load data local inpath '../data/files/alltypes.txt' overwrite into table alltypes +PREHOOK: type: LOAD +PREHOOK: Output: default@alltypes +POSTHOOK: query: load data local inpath '../data/files/alltypes.txt' overwrite into table alltypes +POSTHOOK: type: LOAD +POSTHOOK: Output: default@alltypes +PREHOOK: query: insert overwrite table alltypes_orc select * from alltypes +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: insert overwrite table alltypes_orc select * from alltypes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes +POSTHOOK: Output: default@alltypes_orc +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +PREHOOK: query: -- basicStatState: PARTIAL level: TABLE colStatState: NONE numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: PARTIAL level: TABLE colStatState: NONE numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + expr: ti1 + type: tinyint + expr: si1 + type: smallint + expr: i1 + type: int + expr: bi1 + type: bigint + expr: f1 + type: float + expr: d1 + type: double + expr: de1 + type: decimal(10,0) + expr: ts1 + type: timestamp + expr: da1 + type: timestamp + expr: s1 + type: string + expr: m1 + type: map + expr: l1 + type: array + expr: st1 + type: struct + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- statistics for complex types are not supported yet +analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1,s1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +POSTHOOK: query: -- statistics for complex types are not supported yet +analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1,s1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypes_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +PREHOOK: query: -- numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 1514 +explain extended select * from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + expr: ti1 + type: tinyint + expr: si1 + type: smallint + expr: i1 + type: int + expr: bi1 + type: bigint + expr: f1 + type: float + expr: d1 + type: double + expr: de1 + type: decimal(10,0) + expr: ts1 + type: timestamp + expr: da1 + type: timestamp + expr: s1 + type: string + expr: m1 + type: map + expr: l1 + type: array + expr: st1 + type: struct + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: PARTIAL + ListSink + + +PREHOOK: query: -- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 8 +explain extended select bo1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bo1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types boolean + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- col alias renaming +-- numRows: 2 rawDataSize: 8 +explain extended select i1 as int1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- col alias renaming +-- numRows: 2 rawDataSize: 8 +explain extended select i1 as int1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL i1) int1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: i1 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 172 +explain extended select s1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 172 +explain extended select s1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL s1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: s1 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 174 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 174 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- column statistics for complex types unsupported and so statistics will not be updated +-- numRows: 2 rawDataSize: 1514 +explain extended select m1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- column statistics for complex types unsupported and so statistics will not be updated +-- numRows: 2 rawDataSize: 1514 +explain extended select m1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL m1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: m1 + type: map + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types map + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 244 +explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 244 +explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL bo1)) (TOK_SELEXPR (TOK_TABLE_OR_COL ti1)) (TOK_SELEXPR (TOK_TABLE_OR_COL si1)) (TOK_SELEXPR (TOK_TABLE_OR_COL i1)) (TOK_SELEXPR (TOK_TABLE_OR_COL bi1)) (TOK_SELEXPR (TOK_TABLE_OR_COL f1)) (TOK_SELEXPR (TOK_TABLE_OR_COL d1)) (TOK_SELEXPR (TOK_TABLE_OR_COL s1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + expr: ti1 + type: tinyint + expr: si1 + type: smallint + expr: i1 + type: int + expr: bi1 + type: bigint + expr: f1 + type: float + expr: d1 + type: double + expr: s1 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: + numRows: 2 dataSize: 246 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 246 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types boolean:tinyint:smallint:int:bigint:float:double:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 0 +explain extended select null from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 0 +explain extended select null from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_NULL)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: null + type: string + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 8 +explain extended select 11 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 8 +explain extended select 11 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 11)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: 11 + type: int + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 8 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select 11L from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select 11L from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 11L)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: 11 + type: bigint + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select 11.0 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select 11.0 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 11.0)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: 11.0 + type: double + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 178 +explain extended select "hello" from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 178 +explain extended select "hello" from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR "hello")))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: 'hello' + type: string + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 346 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 346 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 96 +explain extended select unbase64("0xe23") from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 96 +explain extended select unbase64("0xe23") from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION unbase64 "0xe23"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: unbase64('0xe23') + type: binary + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 176 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 176 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types binary + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 16 +explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_TINYINT "1")) (TOK_SELEXPR (TOK_FUNCTION TOK_SMALLINT "20"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: UDFToByte('1') + type: tinyint + expr: UDFToShort('20') + type: smallint + outputColumnNames: _col0, _col1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 16 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types tinyint:smallint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 80 +explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 80 +explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_TIMESTAMP "1970-12-31 15:59:58.174"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: CAST( '1970-12-31 15:59:58.174' AS TIMESTAMP) + type: timestamp + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 80 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 80 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types timestamp + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DATE "1970-12-31 15:59:58.174"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: CAST( '1970-12-31 15:59:58.174' AS DATE) + type: date + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types date + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 224 +explain extended select cast("58.174" as DECIMAL) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 224 +explain extended select cast("58.174" as DECIMAL) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION TOK_DECIMAL "58.174"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: CAST( '58.174' AS decimal(10,0)) + type: decimal(10,0) + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 0 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types decimal(10,0) + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select array(1,2,3) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select array(1,2,3) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION array 1 2 3))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: array(1,2,3) + type: array + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types array + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 1508 +explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 1508 +explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION str_to_map "a=1 b=2 c=3" " " "="))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: str_to_map('a=1 b=2 c=3',' ','=') + type: map + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 1508 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 1508 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types map + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 112 +explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION NAMED_STRUCT "a" 11 "b" 11))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: named_struct('a',11,'b',11) + type: struct + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 112 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 250 +explain extended select CREATE_UNION(0, "hello") from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 250 +explain extended select CREATE_UNION(0, "hello") from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION CREATE_UNION 0 "hello"))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: create_union(0,'hello') + type: uniontype + outputColumnNames: _col0 + Statistics: + numRows: 2 dataSize: 250 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 250 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types uniontype + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 757 +explain extended select count(*) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 757 +explain extended select count(*) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + aggregations: + expr: count() + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 757 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 1 dataSize: 757 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 757 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 757 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 757 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 757 +explain extended select count(1) from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows +-- numRows: 1 rawDataSize: 757 +explain extended select count(1) from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: COMPLETE + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 757 basicStatsState: COMPLETE colStatsState: COMPLETE + Reduce Output Operator + sort order: + Statistics: + numRows: 1 dataSize: 757 basicStatsState: COMPLETE colStatsState: COMPLETE + tag: -1 + value expressions: + expr: _col0 + type: bigint + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 757 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Statistics: + numRows: 1 dataSize: 757 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 1 dataSize: 757 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 2 rawDataSize: 1522 +explain extended select *,11 from alltypes_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 2 rawDataSize: 1522 +explain extended select *,11 from alltypes_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME alltypes_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR 11)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + alltypes_orc + TableScan + alias: alltypes_orc + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: bo1 + type: boolean + expr: ti1 + type: tinyint + expr: si1 + type: smallint + expr: i1 + type: int + expr: bi1 + type: bigint + expr: f1 + type: float + expr: d1 + type: double + expr: de1 + type: decimal(10,0) + expr: ts1 + type: timestamp + expr: da1 + type: timestamp + expr: s1 + type: string + expr: m1 + type: map + expr: l1 + type: array + expr: st1 + type: struct + expr: 11 + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 2 dataSize: 1514 basicStatsState: COMPLETE colStatsState: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: alltypes_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + colelction.delim , + columns bo1,ti1,si1,i1,bi1,f1,d1,de1,ts1,da1,s1,m1,l1,st1 + columns.types boolean:tinyint:smallint:int:bigint:float:double:decimal(10,0):timestamp:timestamp:string:map:array:struct + field.delim | +#### A masked pattern was here #### + mapkey.delim : + name default.alltypes_orc + numFiles 1 + numPartitions 0 + numRows 2 + rawDataSize 1514 + serialization.ddl struct alltypes_orc { bool bo1, byte ti1, i16 si1, i32 i1, i64 bi1, float f1, double d1, decimal(10,0) de1, timestamp ts1, timestamp da1, string s1, map m1, list l1, struct st1} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 1194 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.alltypes_orc + name: default.alltypes_orc + Truncated Path -> Alias: + /alltypes_orc [alltypes_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- will invalidate session level column stats cache +drop table alltypes +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypes +PREHOOK: Output: default@alltypes +POSTHOOK: query: -- will invalidate session level column stats cache +drop table alltypes +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@alltypes +POSTHOOK: Output: default@alltypes +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] +PREHOOK: query: drop table alltypes_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc +POSTHOOK: query: drop table alltypes_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc +POSTHOOK: Lineage: alltypes_orc.bi1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bi1, type:bigint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.bo1 SIMPLE [(alltypes)alltypes.FieldSchema(name:bo1, type:boolean, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.d1 SIMPLE [(alltypes)alltypes.FieldSchema(name:d1, type:double, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.da1 SIMPLE [(alltypes)alltypes.FieldSchema(name:da1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.de1 SIMPLE [(alltypes)alltypes.FieldSchema(name:de1, type:decimal(10,0), comment:null), ] +POSTHOOK: Lineage: alltypes_orc.f1 SIMPLE [(alltypes)alltypes.FieldSchema(name:f1, type:float, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.i1 SIMPLE [(alltypes)alltypes.FieldSchema(name:i1, type:int, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.l1 SIMPLE [(alltypes)alltypes.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.m1 SIMPLE [(alltypes)alltypes.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.s1 SIMPLE [(alltypes)alltypes.FieldSchema(name:s1, type:string, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.si1 SIMPLE [(alltypes)alltypes.FieldSchema(name:si1, type:smallint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.st1 SIMPLE [(alltypes)alltypes.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ti1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ti1, type:tinyint, comment:null), ] +POSTHOOK: Lineage: alltypes_orc.ts1 SIMPLE [(alltypes)alltypes.FieldSchema(name:ts1, type:timestamp, comment:null), ] diff --git ql/src/test/results/clientpositive/annotate_stats_table.q.out ql/src/test/results/clientpositive/annotate_stats_table.q.out new file mode 100644 index 0000000..7f9e0cb --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -0,0 +1,391 @@ +PREHOOK: query: create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists emp_staging ( + lastname string, + deptid int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@emp_staging +PREHOOK: query: create table if not exists emp_orc like emp_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists emp_orc like emp_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@emp_orc +PREHOOK: query: alter table emp_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc +POSTHOOK: query: alter table emp_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc +PREHOOK: query: -- basicStatState: NONE level: TABLE colStatState: NONE +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: NONE level: TABLE colStatState: NONE +explain extended select * from emp_orc +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 0 basicStatsState: NONE colStatsState: NONE + ListSink + + +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@emp_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@emp_staging +PREHOOK: query: insert overwrite table emp_orc select * from emp_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_staging +PREHOOK: Output: default@emp_orc +POSTHOOK: query: insert overwrite table emp_orc select * from emp_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_staging +POSTHOOK: Output: default@emp_orc +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL level: TABLE colStatState: NONE +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL + +-- basicStatState: PARTIAL level: TABLE colStatState: NONE +explain extended select * from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + numRows: 0 dataSize: 300 basicStatsState: PARTIAL colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 0 dataSize: 300 basicStatsState: PARTIAL colStatsState: NONE + ListSink + + +PREHOOK: query: -- table level analyze statistics +analyze table emp_orc compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc +POSTHOOK: query: -- table level analyze statistics +analyze table emp_orc compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE level: TABLE colStatState: NONE +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: TABLE colStatState: NONE +explain extended select * from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 300 basicStatsState: COMPLETE colStatsState: NONE + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 300 basicStatsState: COMPLETE colStatsState: NONE + ListSink + + +PREHOOK: query: -- column level partial statistics +analyze table emp_orc compute statistics for columns deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: query: -- column level partial statistics +analyze table emp_orc compute statistics for columns deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE level: TABLE colStatState: PARTIAL +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: TABLE colStatState: PARTIAL +explain extended select * from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 300 basicStatsState: COMPLETE colStatsState: PARTIAL + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 300 basicStatsState: COMPLETE colStatsState: PARTIAL + ListSink + + +PREHOOK: query: -- all selected columns have statistics +-- basicStatState: COMPLETE level: TABLE colStatState: COMPLETE +explain extended select deptid from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- all selected columns have statistics +-- basicStatState: COMPLETE level: TABLE colStatState: COMPLETE +explain extended select deptid from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL deptid))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + emp_orc + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 300 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: deptid + type: int + outputColumnNames: _col0 + Statistics: + numRows: 6 dataSize: 20 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 6 dataSize: 20 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: emp_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numPartitions 0 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 300 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns lastname,deptid + columns.types string:int + field.delim | +#### A masked pattern was here #### + name default.emp_orc + numFiles 1 + numPartitions 0 + numRows 6 + rawDataSize 0 + serialization.ddl struct emp_orc { string lastname, i32 deptid} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 300 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.emp_orc + name: default.emp_orc + Truncated Path -> Alias: + /emp_orc [emp_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- column level complete statistics +analyze table emp_orc compute statistics for columns lastname,deptid +PREHOOK: type: QUERY +PREHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: query: -- column level complete statistics +analyze table emp_orc compute statistics for columns lastname,deptid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: -- basicStatState: COMPLETE level: TABLE colStatState: COMPLETE +explain extended select * from emp_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- basicStatState: COMPLETE level: TABLE colStatState: COMPLETE +explain extended select * from emp_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME emp_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: emp_orc + Statistics: + numRows: 6 dataSize: 300 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: lastname + type: string + expr: deptid + type: int + outputColumnNames: _col0, _col1 + Statistics: + numRows: 6 dataSize: 300 basicStatsState: COMPLETE colStatsState: COMPLETE + ListSink + + +PREHOOK: query: -- will invalidate session level column stats cache +drop table emp_staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@emp_staging +PREHOOK: Output: default@emp_staging +POSTHOOK: query: -- will invalidate session level column stats cache +drop table emp_staging +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@emp_staging +POSTHOOK: Output: default@emp_staging +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] +PREHOOK: query: drop table emp_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc +POSTHOOK: query: drop table emp_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc +POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] +POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] diff --git ql/src/test/results/clientpositive/annotate_stats_union.q.out ql/src/test/results/clientpositive/annotate_stats_union.q.out new file mode 100644 index 0000000..6864cbe --- /dev/null +++ ql/src/test/results/clientpositive/annotate_stats_union.q.out @@ -0,0 +1,1201 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- numRows: 8 rawDataSize: 680 +explain extended select state from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 680 +explain extended select state from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 16 rawDataSize: 1360 +explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 16 rawDataSize: 1360 +explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:tmp-subquery1:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + null-subquery2:tmp-subquery2:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [null-subquery1:tmp-subquery1:loc_orc, null-subquery2:tmp-subquery2:loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 8 rawDataSize: 796 +explain extended select * from loc_orc +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + ListSink + + +PREHOOK: query: -- numRows: 16 rawDataSize: 1592 +explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp +PREHOOK: type: QUERY +POSTHOOK: query: -- numRows: 16 rawDataSize: 1592 +explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))) tmp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:tmp-subquery1:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: bigint + expr: _col3 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + null-subquery2:tmp-subquery2:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + expr: locid + type: int + expr: zip + type: bigint + expr: year + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: int + expr: _col2 + type: bigint + expr: _col3 + type: int + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1592 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:int:bigint:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc + Truncated Path -> Alias: + /loc_orc [null-subquery1:tmp-subquery1:loc_orc, null-subquery2:tmp-subquery2:loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: create database test +PREHOOK: type: CREATEDATABASE +POSTHOOK: query: create database test +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: use test +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: use test +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: test@loc_staging +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: test@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: test@loc_orc +PREHOOK: Output: test@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: test@loc_orc +POSTHOOK: Output: test@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +PREHOOK: Output: test@loc_staging +POSTHOOK: query: load data local inpath '../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +POSTHOOK: Output: test@loc_staging +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: test@loc_staging +PREHOOK: Output: test@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: test@loc_staging +POSTHOOK: Output: test@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_staging compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: test@loc_staging +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_staging compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: test@loc_staging +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: test@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: test@loc_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: -- there should be 2 entries for DB statistics. Since there are 2 different DBs statistics can't be merged +explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- there should be 2 entries for DB statistics. Since there are 2 different DBs statistics can't be merged +explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME default loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)))))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:temp-subquery1:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + null-subquery2:temp-subquery2:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 16 dataSize: 1376 basicStatsState: COMPLETE colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name default.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.loc_orc + name: default.loc_orc +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: test.loc_orc + name: test.loc_orc + Truncated Path -> Alias: + /loc_orc [null-subquery1:temp-subquery1:loc_orc] + /test.db/loc_orc [null-subquery2:temp-subquery2:loc_orc] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- there should be 2 entries for Table statistics. Since there are 2 different tables statistics can't be merged +explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp +PREHOOK: type: QUERY +POSTHOOK: query: -- there should be 2 entries for Table statistics. Since there are 2 different tables statistics can't be merged +explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp +POSTHOOK: type: QUERY +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test loc_staging))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME test loc_orc))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL state)))))) temp)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + null-subquery1:temp-subquery1:loc_staging + TableScan + alias: loc_staging + Statistics: + numRows: 0 dataSize: 125 basicStatsState: PARTIAL colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 0 dataSize: 125 basicStatsState: PARTIAL colStatsState: COMPLETE + Union + Statistics: + numRows: 8 dataSize: 813 basicStatsState: PARTIAL colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 813 basicStatsState: PARTIAL colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 813 basicStatsState: PARTIAL colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + null-subquery2:temp-subquery2:loc_orc + TableScan + alias: loc_orc + Statistics: + numRows: 8 dataSize: 796 basicStatsState: COMPLETE colStatsState: COMPLETE + GatherStats: false + Select Operator + expressions: + expr: state + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 688 basicStatsState: COMPLETE colStatsState: COMPLETE + Union + Statistics: + numRows: 8 dataSize: 813 basicStatsState: PARTIAL colStatsState: COMPLETE + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Statistics: + numRows: 8 dataSize: 813 basicStatsState: PARTIAL colStatsState: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: + numRows: 8 dataSize: 813 basicStatsState: PARTIAL colStatsState: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: loc_orc + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_orc + numFiles 1 + numPartitions 0 + numRows 8 + rawDataSize 796 + serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde + totalSize 417 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: test.loc_orc + name: test.loc_orc +#### A masked pattern was here #### + Partition + base file name: loc_staging + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_staging + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct loc_staging { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 125 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns state,locid,zip,year + columns.types string:int:bigint:int + field.delim | +#### A masked pattern was here #### + name test.loc_staging + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct loc_staging { string state, i32 locid, i64 zip, i32 year} + serialization.format | + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 125 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: test.loc_staging + name: test.loc_staging + Truncated Path -> Alias: + /test.db/loc_orc [null-subquery2:temp-subquery2:loc_orc] + /test.db/loc_staging [null-subquery1:temp-subquery1:loc_staging] + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- will invalidate session level column stats cache +drop table loc_staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: test@loc_staging +PREHOOK: Output: test@loc_staging +POSTHOOK: query: -- will invalidate session level column stats cache +drop table loc_staging +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: test@loc_staging +POSTHOOK: Output: test@loc_staging +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: drop table loc_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: test@loc_orc +PREHOOK: Output: test@loc_orc +POSTHOOK: query: drop table loc_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: test@loc_orc +POSTHOOK: Output: test@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: drop database test +PREHOOK: type: DROPDATABASE +POSTHOOK: query: drop database test +POSTHOOK: type: DROPDATABASE +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: use default +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: use default +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: drop table loc_staging +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_staging +POSTHOOK: query: drop table loc_staging +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_staging +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: drop table loc_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: drop table loc_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ]